blob: 770f8463dd23370a18863989c8cb8627ed92f965 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Stéphane Michaut454e3972017-08-28 14:30:43 +020033/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
Daniel Veillard34ce8be2002-03-18 19:37:11 +000038#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000039#include "libxml.h"
40
Daniel Veillard3c5ed912002-01-08 10:36:16 +000041#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000042#define XML_DIR_SEP '\\'
43#else
Owen Taylor3473f882001-02-23 17:55:21 +000044#define XML_DIR_SEP '/'
45#endif
46
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080048#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000050#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020051#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000052#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000053#include <libxml/threads.h>
54#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000064#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000067#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
Owen Taylor3473f882001-02-23 17:55:21 +000071#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
86#ifdef HAVE_ZLIB_H
87#include <zlib.h>
88#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020089#ifdef HAVE_LZMA_H
90#include <lzma.h>
91#endif
Owen Taylor3473f882001-02-23 17:55:21 +000092
Daniel Veillard768eb3b2012-07-16 14:19:49 +080093#include "buf.h"
94#include "enc.h"
95
Daniel Veillard0161e632008-08-28 15:36:32 +000096static void
97xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
98
Rob Richards9c0aa472009-03-26 18:10:19 +000099static xmlParserCtxtPtr
100xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
101 const xmlChar *base, xmlParserCtxtPtr pctx);
102
Daniel Veillard28cd9cb2015-11-20 14:55:30 +0800103static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
Daniel Veillard0161e632008-08-28 15:36:32 +0000105/************************************************************************
106 * *
107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
108 * *
109 ************************************************************************/
110
111#define XML_PARSER_BIG_ENTITY 1000
112#define XML_PARSER_LOT_ENTITY 5000
113
114/*
115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116 * replacement over the size in byte of the input indicates that you have
117 * and eponential behaviour. A value of 10 correspond to at least 3 entity
118 * replacement per byte of input.
119 */
120#define XML_PARSER_NON_LINEAR 10
121
122/*
123 * xmlParserEntityCheck
124 *
125 * Function to check non-linear entity expansion behaviour
126 * This is here to detect and stop exponential linear entity expansion
127 * This is not a limitation of the parser but a safety
128 * boundary feature. It can be disabled with the XML_PARSE_HUGE
129 * parser option.
130 */
131static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800132xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000134{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800135 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000136
137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
138 return (0);
139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
140 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800141
142 /*
143 * This may look absurd but is needed to detect
144 * entities problems
145 */
146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800147 (ent->content != NULL) && (ent->checked == 0) &&
148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800149 unsigned long oldnbent = ctxt->nbentities;
150 xmlChar *rep;
151
152 ent->checked = 1;
153
Peter Simons8f30bdf2016-04-15 11:56:55 +0200154 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800155 rep = xmlStringDecodeEntities(ctxt, ent->content,
156 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200157 --ctxt->depth;
Daniel Veillardbdd66182016-05-23 12:27:58 +0800158 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
159 ent->content[0] = 0;
160 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800161
162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
163 if (rep != NULL) {
164 if (xmlStrchr(rep, '<'))
165 ent->checked |= 1;
166 xmlFree(rep);
167 rep = NULL;
168 }
169 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800170 if (replacement != 0) {
171 if (replacement < XML_MAX_TEXT_LENGTH)
172 return(0);
173
174 /*
175 * If the volume of entity copy reaches 10 times the
176 * amount of parsed data and over the large text threshold
177 * then that's very likely to be an abuse.
178 */
179 if (ctxt->input != NULL) {
180 consumed = ctxt->input->consumed +
181 (ctxt->input->cur - ctxt->input->base);
182 }
183 consumed += ctxt->sizeentities;
184
185 if (replacement < XML_PARSER_NON_LINEAR * consumed)
186 return(0);
187 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000188 /*
189 * Do the check based on the replacement size of the entity
190 */
191 if (size < XML_PARSER_BIG_ENTITY)
192 return(0);
193
194 /*
195 * A limit on the amount of text data reasonably used
196 */
197 if (ctxt->input != NULL) {
198 consumed = ctxt->input->consumed +
199 (ctxt->input->cur - ctxt->input->base);
200 }
201 consumed += ctxt->sizeentities;
202
203 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
204 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
205 return (0);
206 } else if (ent != NULL) {
207 /*
208 * use the number of parsed entities in the replacement
209 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800210 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212 /*
213 * The amount of data parsed counting entities size only once
214 */
215 if (ctxt->input != NULL) {
216 consumed = ctxt->input->consumed +
217 (ctxt->input->cur - ctxt->input->base);
218 }
219 consumed += ctxt->sizeentities;
220
221 /*
222 * Check the density of entities for the amount of data
223 * knowing an entity reference will take at least 3 bytes
224 */
225 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
226 return (0);
227 } else {
228 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800229 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800231 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
232 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
233 (ctxt->nbentities <= 10000))
234 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000235 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000236 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
237 return (1);
238}
239
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000240/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000241 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000242 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000243 * arbitrary depth limit for the XML documents that we allow to
244 * process. This is not a limitation of the parser but a safety
245 * boundary feature. It can be disabled with the XML_PARSE_HUGE
246 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000247 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000248unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000249
Daniel Veillard0fb18932003-09-07 09:14:37 +0000250
Daniel Veillard0161e632008-08-28 15:36:32 +0000251
252#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000253#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000254#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000255#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
256
Daniel Veillard1f972e92012-08-15 10:16:37 +0800257/**
258 * XML_PARSER_CHUNK_SIZE
259 *
260 * When calling GROW that's the minimal amount of data
261 * the parser expected to have received. It is not a hard
262 * limit but an optimization when reading strings like Names
263 * It is not strictly needed as long as inputs available characters
264 * are followed by 0, which should be provided by the I/O level
265 */
266#define XML_PARSER_CHUNK_SIZE 100
267
Owen Taylor3473f882001-02-23 17:55:21 +0000268/*
Owen Taylor3473f882001-02-23 17:55:21 +0000269 * List of XML prefixed PI allowed by W3C specs
270 */
271
Daniel Veillardb44025c2001-10-11 22:55:55 +0000272static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000273 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800274 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000275 NULL
276};
277
Daniel Veillarda07050d2003-10-19 14:46:32 +0000278
Owen Taylor3473f882001-02-23 17:55:21 +0000279/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200280static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
281 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard7d515752003-09-26 19:12:37 +0000283static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000284xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
285 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000286 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000287 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000288
Daniel Veillard37334572008-07-31 08:20:02 +0000289static int
290xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
291 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000292#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000293static void
294xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
295 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000296#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000297
Daniel Veillard7d515752003-09-26 19:12:37 +0000298static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000299xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
300 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000301
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000302static int
303xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
304
Daniel Veillarde57ec792003-09-10 10:50:59 +0000305/************************************************************************
306 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800307 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 * *
309 ************************************************************************/
310
311/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 * xmlErrAttributeDup:
313 * @ctxt: an XML parser context
314 * @prefix: the attribute prefix
315 * @localname: the attribute localname
316 *
317 * Handle a redefinition of attribute error
318 */
319static void
320xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
321 const xmlChar * localname)
322{
Daniel Veillard157fee02003-10-31 10:36:03 +0000323 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
324 (ctxt->instate == XML_PARSER_EOF))
325 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000326 if (ctxt != NULL)
327 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200328
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000329 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 (const char *) localname, NULL, NULL, 0, 0,
333 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000334 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000335 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200336 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 (const char *) prefix, (const char *) localname,
338 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
339 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000340 if (ctxt != NULL) {
341 ctxt->wellFormed = 0;
342 if (ctxt->recovery == 0)
343 ctxt->disableSAX = 1;
344 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345}
346
347/**
348 * xmlFatalErr:
349 * @ctxt: an XML parser context
350 * @error: the error number
351 * @extra: extra information string
352 *
353 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
354 */
355static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357{
358 const char *errmsg;
359
Daniel Veillard157fee02003-10-31 10:36:03 +0000360 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
361 (ctxt->instate == XML_PARSER_EOF))
362 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 switch (error) {
364 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 errmsg = "internal error";
375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800437 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 errmsg = "Fragment not allowed";
444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800446 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800461 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800464 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800468 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000473 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800474 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000475 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800477 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000478 break;
479 case XML_ERR_CONDSEC_INVALID_KEYWORD:
480 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000492 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000495 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800499 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000501 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800502 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800505 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000506 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000507 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800508 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000509 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000510 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800511 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000512 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000513 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000525 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000528 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000530 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000531 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800532 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800534 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800535 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800536 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000537#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000538 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800539 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000540 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000541#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800543 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000544 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000545 if (ctxt != NULL)
546 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800547 if (info == NULL) {
548 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
549 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
550 errmsg);
551 } else {
552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
553 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
554 errmsg, info);
555 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000556 if (ctxt != NULL) {
557 ctxt->wellFormed = 0;
558 if (ctxt->recovery == 0)
559 ctxt->disableSAX = 1;
560 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000561}
562
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000563/**
564 * xmlFatalErrMsg:
565 * @ctxt: an XML parser context
566 * @error: the error number
567 * @msg: the error message
568 *
569 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
570 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800571static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000572xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
573 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574{
Daniel Veillard157fee02003-10-31 10:36:03 +0000575 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
576 (ctxt->instate == XML_PARSER_EOF))
577 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000578 if (ctxt != NULL)
579 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000580 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200581 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000582 if (ctxt != NULL) {
583 ctxt->wellFormed = 0;
584 if (ctxt->recovery == 0)
585 ctxt->disableSAX = 1;
586 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000587}
588
589/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000590 * xmlWarningMsg:
591 * @ctxt: an XML parser context
592 * @error: the error number
593 * @msg: the error message
594 * @str1: extra data
595 * @str2: extra data
596 *
597 * Handle a warning.
598 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800599static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000600xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601 const char *msg, const xmlChar *str1, const xmlChar *str2)
602{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000603 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000604
Daniel Veillard157fee02003-10-31 10:36:03 +0000605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606 (ctxt->instate == XML_PARSER_EOF))
607 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000608 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
609 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000610 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200611 if (ctxt != NULL) {
612 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000613 (ctxt->sax) ? ctxt->sax->warning : NULL,
614 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200619 } else {
620 __xmlRaiseError(schannel, NULL, NULL,
621 ctxt, NULL, XML_FROM_PARSER, error,
622 XML_ERR_WARNING, NULL, 0,
623 (const char *) str1, (const char *) str2, NULL, 0, 0,
624 msg, (const char *) str1, (const char *) str2);
625 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000626}
627
628/**
629 * xmlValidityError:
630 * @ctxt: an XML parser context
631 * @error: the error number
632 * @msg: the error message
633 * @str1: extra data
634 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000635 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000636 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800637static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000638xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000639 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000640{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000641 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000642
643 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
644 (ctxt->instate == XML_PARSER_EOF))
645 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000646 if (ctxt != NULL) {
647 ctxt->errNo = error;
648 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
649 schannel = ctxt->sax->serror;
650 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200651 if (ctxt != NULL) {
652 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000653 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000654 ctxt, NULL, XML_FROM_DTD, error,
655 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000656 (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000658 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200659 } else {
660 __xmlRaiseError(schannel, NULL, NULL,
661 ctxt, NULL, XML_FROM_DTD, error,
662 XML_ERR_ERROR, NULL, 0, (const char *) str1,
663 (const char *) str2, NULL, 0, 0,
664 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000665 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000666}
667
668/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000669 * xmlFatalErrMsgInt:
670 * @ctxt: an XML parser context
671 * @error: the error number
672 * @msg: the error message
673 * @val: an integer value
674 *
675 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
676 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800677static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000678xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000679 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000680{
Daniel Veillard157fee02003-10-31 10:36:03 +0000681 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
682 (ctxt->instate == XML_PARSER_EOF))
683 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000684 if (ctxt != NULL)
685 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000686 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000687 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
688 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000689 if (ctxt != NULL) {
690 ctxt->wellFormed = 0;
691 if (ctxt->recovery == 0)
692 ctxt->disableSAX = 1;
693 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000694}
695
696/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000697 * xmlFatalErrMsgStrIntStr:
698 * @ctxt: an XML parser context
699 * @error: the error number
700 * @msg: the error message
701 * @str1: an string info
702 * @val: an integer value
703 * @str2: an string info
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800707static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000708xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800709 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000710 const xmlChar *str2)
711{
Daniel Veillard157fee02003-10-31 10:36:03 +0000712 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
713 (ctxt->instate == XML_PARSER_EOF))
714 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000715 if (ctxt != NULL)
716 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000717 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000718 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
719 NULL, 0, (const char *) str1, (const char *) str2,
720 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000721 if (ctxt != NULL) {
722 ctxt->wellFormed = 0;
723 if (ctxt->recovery == 0)
724 ctxt->disableSAX = 1;
725 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000726}
727
728/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000729 * xmlFatalErrMsgStr:
730 * @ctxt: an XML parser context
731 * @error: the error number
732 * @msg: the error message
733 * @val: a string value
734 *
735 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
736 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800737static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000738xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000739 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000740{
Daniel Veillard157fee02003-10-31 10:36:03 +0000741 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
742 (ctxt->instate == XML_PARSER_EOF))
743 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000744 if (ctxt != NULL)
745 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000746 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000747 XML_FROM_PARSER, error, XML_ERR_FATAL,
748 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
749 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000750 if (ctxt != NULL) {
751 ctxt->wellFormed = 0;
752 if (ctxt->recovery == 0)
753 ctxt->disableSAX = 1;
754 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000755}
756
757/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000758 * xmlErrMsgStr:
759 * @ctxt: an XML parser context
760 * @error: the error number
761 * @msg: the error message
762 * @val: a string value
763 *
764 * Handle a non fatal parser error
765 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800766static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000767xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
768 const char *msg, const xmlChar * val)
769{
Daniel Veillard157fee02003-10-31 10:36:03 +0000770 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
771 (ctxt->instate == XML_PARSER_EOF))
772 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000773 if (ctxt != NULL)
774 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000775 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000776 XML_FROM_PARSER, error, XML_ERR_ERROR,
777 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
778 val);
779}
780
781/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000782 * xmlNsErr:
783 * @ctxt: an XML parser context
784 * @error: the error number
785 * @msg: the message
786 * @info1: extra information string
787 * @info2: extra information string
788 *
789 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
790 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800791static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000792xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
793 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000794 const xmlChar * info1, const xmlChar * info2,
795 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000796{
Daniel Veillard157fee02003-10-31 10:36:03 +0000797 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
798 (ctxt->instate == XML_PARSER_EOF))
799 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000800 if (ctxt != NULL)
801 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000802 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000803 XML_ERR_ERROR, NULL, 0, (const char *) info1,
804 (const char *) info2, (const char *) info3, 0, 0, msg,
805 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000806 if (ctxt != NULL)
807 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000808}
809
Daniel Veillard37334572008-07-31 08:20:02 +0000810/**
811 * xmlNsWarn
812 * @ctxt: an XML parser context
813 * @error: the error number
814 * @msg: the message
815 * @info1: extra information string
816 * @info2: extra information string
817 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800818 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000819 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800820static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000821xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
822 const char *msg,
823 const xmlChar * info1, const xmlChar * info2,
824 const xmlChar * info3)
825{
826 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
827 (ctxt->instate == XML_PARSER_EOF))
828 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000829 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
830 XML_ERR_WARNING, NULL, 0, (const char *) info1,
831 (const char *) info2, (const char *) info3, 0, 0, msg,
832 info1, info2, info3);
833}
834
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000835/************************************************************************
836 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800837 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000838 * *
839 ************************************************************************/
840
841/**
842 * xmlHasFeature:
843 * @feature: the feature to be examined
844 *
845 * Examines if the library has been compiled with a given feature.
846 *
847 * Returns a non-zero value if the feature exist, otherwise zero.
848 * Returns zero (0) if the feature does not exist or an unknown
849 * unknown feature is requested, non-zero otherwise.
850 */
851int
852xmlHasFeature(xmlFeature feature)
853{
854 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_THREAD_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_TREE_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_OUTPUT_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_PUSH_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_READER_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_PATTERN_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_WRITER_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_SAX1_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_FTP_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_HTTP_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_VALID_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_HTML_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_LEGACY_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_C14N_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef LIBXML_CATALOG_ENABLED
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_XPATH_ENABLED
947 return(1);
948#else
949 return(0);
950#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000951 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000952#ifdef LIBXML_XPTR_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000957 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000958#ifdef LIBXML_XINCLUDE_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000963 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000964#ifdef LIBXML_ICONV_ENABLED
965 return(1);
966#else
967 return(0);
968#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000969 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000970#ifdef LIBXML_ISO8859X_ENABLED
971 return(1);
972#else
973 return(0);
974#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000975 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000976#ifdef LIBXML_UNICODE_ENABLED
977 return(1);
978#else
979 return(0);
980#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000981 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000982#ifdef LIBXML_REGEXP_ENABLED
983 return(1);
984#else
985 return(0);
986#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000987 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000988#ifdef LIBXML_AUTOMATA_ENABLED
989 return(1);
990#else
991 return(0);
992#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000993 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000994#ifdef LIBXML_EXPR_ENABLED
995 return(1);
996#else
997 return(0);
998#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000999 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001000#ifdef LIBXML_SCHEMAS_ENABLED
1001 return(1);
1002#else
1003 return(0);
1004#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001005 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001006#ifdef LIBXML_SCHEMATRON_ENABLED
1007 return(1);
1008#else
1009 return(0);
1010#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001011 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012#ifdef LIBXML_MODULES_ENABLED
1013 return(1);
1014#else
1015 return(0);
1016#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001017 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001018#ifdef LIBXML_DEBUG_ENABLED
1019 return(1);
1020#else
1021 return(0);
1022#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001023 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001024#ifdef DEBUG_MEMORY_LOCATION
1025 return(1);
1026#else
1027 return(0);
1028#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001029 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001030#ifdef LIBXML_DEBUG_RUNTIME
1031 return(1);
1032#else
1033 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001034#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001035 case XML_WITH_ZLIB:
1036#ifdef LIBXML_ZLIB_ENABLED
1037 return(1);
1038#else
1039 return(0);
1040#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001041 case XML_WITH_LZMA:
1042#ifdef LIBXML_LZMA_ENABLED
1043 return(1);
1044#else
1045 return(0);
1046#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001047 case XML_WITH_ICU:
1048#ifdef LIBXML_ICU_ENABLED
1049 return(1);
1050#else
1051 return(0);
1052#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001053 default:
1054 break;
1055 }
1056 return(0);
1057}
1058
1059/************************************************************************
1060 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001061 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 * *
1063 ************************************************************************/
1064
1065/**
1066 * xmlDetectSAX2:
1067 * @ctxt: an XML parser context
1068 *
1069 * Do the SAX2 detection and specific intialization
1070 */
1071static void
1072xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1073 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001074#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001075 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1076 ((ctxt->sax->startElementNs != NULL) ||
1077 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001078#else
1079 ctxt->sax2 = 1;
1080#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001081
1082 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1083 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1084 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001085 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1086 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001087 xmlErrMemory(ctxt, NULL);
1088 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001089}
1090
Daniel Veillarde57ec792003-09-10 10:50:59 +00001091typedef struct _xmlDefAttrs xmlDefAttrs;
1092typedef xmlDefAttrs *xmlDefAttrsPtr;
1093struct _xmlDefAttrs {
1094 int nbAttrs; /* number of defaulted attributes on that element */
1095 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001096#if __STDC_VERSION__ >= 199901L
1097 /* Using a C99 flexible array member avoids UBSan errors. */
1098 const xmlChar *values[]; /* array of localname/prefix/values/external */
1099#else
1100 const xmlChar *values[5];
1101#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103
1104/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001105 * xmlAttrNormalizeSpace:
1106 * @src: the source string
1107 * @dst: the target string
1108 *
1109 * Normalize the space in non CDATA attribute values:
1110 * If the attribute type is not CDATA, then the XML processor MUST further
1111 * process the normalized attribute value by discarding any leading and
1112 * trailing space (#x20) characters, and by replacing sequences of space
1113 * (#x20) characters by a single space (#x20) character.
1114 * Note that the size of dst need to be at least src, and if one doesn't need
1115 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1116 * passing src as dst is just fine.
1117 *
1118 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1119 * is needed.
1120 */
1121static xmlChar *
1122xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1123{
1124 if ((src == NULL) || (dst == NULL))
1125 return(NULL);
1126
1127 while (*src == 0x20) src++;
1128 while (*src != 0) {
1129 if (*src == 0x20) {
1130 while (*src == 0x20) src++;
1131 if (*src != 0)
1132 *dst++ = 0x20;
1133 } else {
1134 *dst++ = *src++;
1135 }
1136 }
1137 *dst = 0;
1138 if (dst == src)
1139 return(NULL);
1140 return(dst);
1141}
1142
1143/**
1144 * xmlAttrNormalizeSpace2:
1145 * @src: the source string
1146 *
1147 * Normalize the space in non CDATA attribute values, a slightly more complex
1148 * front end to avoid allocation problems when running on attribute values
1149 * coming from the input.
1150 *
1151 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1152 * is needed.
1153 */
1154static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001155xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001156{
1157 int i;
1158 int remove_head = 0;
1159 int need_realloc = 0;
1160 const xmlChar *cur;
1161
1162 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1163 return(NULL);
1164 i = *len;
1165 if (i <= 0)
1166 return(NULL);
1167
1168 cur = src;
1169 while (*cur == 0x20) {
1170 cur++;
1171 remove_head++;
1172 }
1173 while (*cur != 0) {
1174 if (*cur == 0x20) {
1175 cur++;
1176 if ((*cur == 0x20) || (*cur == 0)) {
1177 need_realloc = 1;
1178 break;
1179 }
1180 } else
1181 cur++;
1182 }
1183 if (need_realloc) {
1184 xmlChar *ret;
1185
1186 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1187 if (ret == NULL) {
1188 xmlErrMemory(ctxt, NULL);
1189 return(NULL);
1190 }
1191 xmlAttrNormalizeSpace(ret, ret);
1192 *len = (int) strlen((const char *)ret);
1193 return(ret);
1194 } else if (remove_head) {
1195 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001196 memmove(src, src + remove_head, 1 + *len);
1197 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001198 }
1199 return(NULL);
1200}
1201
1202/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001203 * xmlAddDefAttrs:
1204 * @ctxt: an XML parser context
1205 * @fullname: the element fullname
1206 * @fullattr: the attribute fullname
1207 * @value: the attribute value
1208 *
1209 * Add a defaulted attribute for an element
1210 */
1211static void
1212xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1213 const xmlChar *fullname,
1214 const xmlChar *fullattr,
1215 const xmlChar *value) {
1216 xmlDefAttrsPtr defaults;
1217 int len;
1218 const xmlChar *name;
1219 const xmlChar *prefix;
1220
Daniel Veillard6a31b832008-03-26 14:06:44 +00001221 /*
1222 * Allows to detect attribute redefinitions
1223 */
1224 if (ctxt->attsSpecial != NULL) {
1225 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1226 return;
1227 }
1228
Daniel Veillarde57ec792003-09-10 10:50:59 +00001229 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001230 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001231 if (ctxt->attsDefault == NULL)
1232 goto mem_error;
1233 }
1234
1235 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001236 * split the element name into prefix:localname , the string found
1237 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001238 */
1239 name = xmlSplitQName3(fullname, &len);
1240 if (name == NULL) {
1241 name = xmlDictLookup(ctxt->dict, fullname, -1);
1242 prefix = NULL;
1243 } else {
1244 name = xmlDictLookup(ctxt->dict, name, -1);
1245 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1246 }
1247
1248 /*
1249 * make sure there is some storage
1250 */
1251 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1252 if (defaults == NULL) {
1253 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001254 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001255 if (defaults == NULL)
1256 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001257 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001258 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001259 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1260 defaults, NULL) < 0) {
1261 xmlFree(defaults);
1262 goto mem_error;
1263 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001265 xmlDefAttrsPtr temp;
1266
1267 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001268 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001269 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001270 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001271 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001272 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001273 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1274 defaults, NULL) < 0) {
1275 xmlFree(defaults);
1276 goto mem_error;
1277 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001278 }
1279
1280 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001281 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001282 * are within the DTD and hen not associated to namespace names.
1283 */
1284 name = xmlSplitQName3(fullattr, &len);
1285 if (name == NULL) {
1286 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1287 prefix = NULL;
1288 } else {
1289 name = xmlDictLookup(ctxt->dict, name, -1);
1290 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1291 }
1292
Daniel Veillardae0765b2008-07-31 19:54:59 +00001293 defaults->values[5 * defaults->nbAttrs] = name;
1294 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001295 /* intern the string and precompute the end */
1296 len = xmlStrlen(value);
1297 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001298 defaults->values[5 * defaults->nbAttrs + 2] = value;
1299 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1300 if (ctxt->external)
1301 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1302 else
1303 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001304 defaults->nbAttrs++;
1305
1306 return;
1307
1308mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001309 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001310 return;
1311}
1312
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001313/**
1314 * xmlAddSpecialAttr:
1315 * @ctxt: an XML parser context
1316 * @fullname: the element fullname
1317 * @fullattr: the attribute fullname
1318 * @type: the attribute type
1319 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001320 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001321 */
1322static void
1323xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1324 const xmlChar *fullname,
1325 const xmlChar *fullattr,
1326 int type)
1327{
1328 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001329 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001330 if (ctxt->attsSpecial == NULL)
1331 goto mem_error;
1332 }
1333
Daniel Veillardac4118d2008-01-11 05:27:32 +00001334 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1335 return;
1336
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001337 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1338 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001339 return;
1340
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001343 return;
1344}
1345
Daniel Veillard4432df22003-09-28 18:58:27 +00001346/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001347 * xmlCleanSpecialAttrCallback:
1348 *
1349 * Removes CDATA attributes from the special attribute table
1350 */
1351static void
1352xmlCleanSpecialAttrCallback(void *payload, void *data,
1353 const xmlChar *fullname, const xmlChar *fullattr,
1354 const xmlChar *unused ATTRIBUTE_UNUSED) {
1355 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1356
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001357 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001358 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1359 }
1360}
1361
1362/**
1363 * xmlCleanSpecialAttr:
1364 * @ctxt: an XML parser context
1365 *
1366 * Trim the list of attributes defined to remove all those of type
1367 * CDATA as they are not special. This call should be done when finishing
1368 * to parse the DTD and before starting to parse the document root.
1369 */
1370static void
1371xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1372{
1373 if (ctxt->attsSpecial == NULL)
1374 return;
1375
1376 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1377
1378 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1379 xmlHashFree(ctxt->attsSpecial, NULL);
1380 ctxt->attsSpecial = NULL;
1381 }
1382 return;
1383}
1384
1385/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001386 * xmlCheckLanguageID:
1387 * @lang: pointer to the string value
1388 *
1389 * Checks that the value conforms to the LanguageID production:
1390 *
1391 * NOTE: this is somewhat deprecated, those productions were removed from
1392 * the XML Second edition.
1393 *
1394 * [33] LanguageID ::= Langcode ('-' Subcode)*
1395 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1396 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1397 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1398 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1399 * [38] Subcode ::= ([a-z] | [A-Z])+
1400 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001401 * The current REC reference the sucessors of RFC 1766, currently 5646
1402 *
1403 * http://www.rfc-editor.org/rfc/rfc5646.txt
1404 * langtag = language
1405 * ["-" script]
1406 * ["-" region]
1407 * *("-" variant)
1408 * *("-" extension)
1409 * ["-" privateuse]
1410 * language = 2*3ALPHA ; shortest ISO 639 code
1411 * ["-" extlang] ; sometimes followed by
1412 * ; extended language subtags
1413 * / 4ALPHA ; or reserved for future use
1414 * / 5*8ALPHA ; or registered language subtag
1415 *
1416 * extlang = 3ALPHA ; selected ISO 639 codes
1417 * *2("-" 3ALPHA) ; permanently reserved
1418 *
1419 * script = 4ALPHA ; ISO 15924 code
1420 *
1421 * region = 2ALPHA ; ISO 3166-1 code
1422 * / 3DIGIT ; UN M.49 code
1423 *
1424 * variant = 5*8alphanum ; registered variants
1425 * / (DIGIT 3alphanum)
1426 *
1427 * extension = singleton 1*("-" (2*8alphanum))
1428 *
1429 * ; Single alphanumerics
1430 * ; "x" reserved for private use
1431 * singleton = DIGIT ; 0 - 9
1432 * / %x41-57 ; A - W
1433 * / %x59-5A ; Y - Z
1434 * / %x61-77 ; a - w
1435 * / %x79-7A ; y - z
1436 *
1437 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1438 * The parser below doesn't try to cope with extension or privateuse
1439 * that could be added but that's not interoperable anyway
1440 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001441 * Returns 1 if correct 0 otherwise
1442 **/
1443int
1444xmlCheckLanguageID(const xmlChar * lang)
1445{
Daniel Veillard60587d62010-11-04 15:16:27 +01001446 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001447
1448 if (cur == NULL)
1449 return (0);
1450 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001451 ((cur[0] == 'I') && (cur[1] == '-')) ||
1452 ((cur[0] == 'x') && (cur[1] == '-')) ||
1453 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001454 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001455 * Still allow IANA code and user code which were coming
1456 * from the previous version of the XML-1.0 specification
1457 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001458 */
1459 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001460 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001461 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1462 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001463 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001464 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001465 nxt = cur;
1466 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1467 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1468 nxt++;
1469 if (nxt - cur >= 4) {
1470 /*
1471 * Reserved
1472 */
1473 if ((nxt - cur > 8) || (nxt[0] != 0))
1474 return(0);
1475 return(1);
1476 }
1477 if (nxt - cur < 2)
1478 return(0);
1479 /* we got an ISO 639 code */
1480 if (nxt[0] == 0)
1481 return(1);
1482 if (nxt[0] != '-')
1483 return(0);
1484
1485 nxt++;
1486 cur = nxt;
1487 /* now we can have extlang or script or region or variant */
1488 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1489 goto region_m49;
1490
1491 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1492 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1493 nxt++;
1494 if (nxt - cur == 4)
1495 goto script;
1496 if (nxt - cur == 2)
1497 goto region;
1498 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1499 goto variant;
1500 if (nxt - cur != 3)
1501 return(0);
1502 /* we parsed an extlang */
1503 if (nxt[0] == 0)
1504 return(1);
1505 if (nxt[0] != '-')
1506 return(0);
1507
1508 nxt++;
1509 cur = nxt;
1510 /* now we can have script or region or variant */
1511 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1512 goto region_m49;
1513
1514 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1516 nxt++;
1517 if (nxt - cur == 2)
1518 goto region;
1519 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1520 goto variant;
1521 if (nxt - cur != 4)
1522 return(0);
1523 /* we parsed a script */
1524script:
1525 if (nxt[0] == 0)
1526 return(1);
1527 if (nxt[0] != '-')
1528 return(0);
1529
1530 nxt++;
1531 cur = nxt;
1532 /* now we can have region or variant */
1533 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1534 goto region_m49;
1535
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538 nxt++;
1539
1540 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1541 goto variant;
1542 if (nxt - cur != 2)
1543 return(0);
1544 /* we parsed a region */
1545region:
1546 if (nxt[0] == 0)
1547 return(1);
1548 if (nxt[0] != '-')
1549 return(0);
1550
1551 nxt++;
1552 cur = nxt;
1553 /* now we can just have a variant */
1554 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1555 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1556 nxt++;
1557
1558 if ((nxt - cur < 5) || (nxt - cur > 8))
1559 return(0);
1560
1561 /* we parsed a variant */
1562variant:
1563 if (nxt[0] == 0)
1564 return(1);
1565 if (nxt[0] != '-')
1566 return(0);
1567 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001568 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001569
1570region_m49:
1571 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1572 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1573 nxt += 3;
1574 goto region;
1575 }
1576 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001577}
1578
Owen Taylor3473f882001-02-23 17:55:21 +00001579/************************************************************************
1580 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001581 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001582 * *
1583 ************************************************************************/
1584
Daniel Veillard8ed10722009-08-20 19:17:36 +02001585static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1586 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001587
Daniel Veillard0fb18932003-09-07 09:14:37 +00001588#ifdef SAX2
1589/**
1590 * nsPush:
1591 * @ctxt: an XML parser context
1592 * @prefix: the namespace prefix or NULL
1593 * @URL: the namespace name
1594 *
1595 * Pushes a new parser namespace on top of the ns stack
1596 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001597 * Returns -1 in case of error, -2 if the namespace should be discarded
1598 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001599 */
1600static int
1601nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1602{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001603 if (ctxt->options & XML_PARSE_NSCLEAN) {
1604 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001605 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001606 if (ctxt->nsTab[i] == prefix) {
1607 /* in scope */
1608 if (ctxt->nsTab[i + 1] == URL)
1609 return(-2);
1610 /* out of scope keep it */
1611 break;
1612 }
1613 }
1614 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001615 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1616 ctxt->nsMax = 10;
1617 ctxt->nsNr = 0;
1618 ctxt->nsTab = (const xmlChar **)
1619 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1620 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001621 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001622 ctxt->nsMax = 0;
1623 return (-1);
1624 }
1625 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001626 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001627 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001628 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1629 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1630 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001631 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001632 ctxt->nsMax /= 2;
1633 return (-1);
1634 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001635 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 }
1637 ctxt->nsTab[ctxt->nsNr++] = prefix;
1638 ctxt->nsTab[ctxt->nsNr++] = URL;
1639 return (ctxt->nsNr);
1640}
1641/**
1642 * nsPop:
1643 * @ctxt: an XML parser context
1644 * @nr: the number to pop
1645 *
1646 * Pops the top @nr parser prefix/namespace from the ns stack
1647 *
1648 * Returns the number of namespaces removed
1649 */
1650static int
1651nsPop(xmlParserCtxtPtr ctxt, int nr)
1652{
1653 int i;
1654
1655 if (ctxt->nsTab == NULL) return(0);
1656 if (ctxt->nsNr < nr) {
1657 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1658 nr = ctxt->nsNr;
1659 }
1660 if (ctxt->nsNr <= 0)
1661 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001662
Daniel Veillard0fb18932003-09-07 09:14:37 +00001663 for (i = 0;i < nr;i++) {
1664 ctxt->nsNr--;
1665 ctxt->nsTab[ctxt->nsNr] = NULL;
1666 }
1667 return(nr);
1668}
1669#endif
1670
1671static int
1672xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1673 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001674 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001675 int maxatts;
1676
1677 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001679 atts = (const xmlChar **)
1680 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001682 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001683 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001686 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 } else if (nr + 5 > ctxt->maxatts) {
1688 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001689 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1690 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001691 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001692 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001693 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1694 (maxatts / 5) * sizeof(int));
1695 if (attallocs == NULL) goto mem_error;
1696 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001697 ctxt->maxatts = maxatts;
1698 }
1699 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001700mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001701 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001702 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001703}
1704
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001705/**
1706 * inputPush:
1707 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001708 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001709 *
1710 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001711 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001712 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001713 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001714int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1716{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001717 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001718 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001719 if (ctxt->inputNr >= ctxt->inputMax) {
1720 ctxt->inputMax *= 2;
1721 ctxt->inputTab =
1722 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1723 ctxt->inputMax *
1724 sizeof(ctxt->inputTab[0]));
1725 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001726 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001727 xmlFreeInputStream(value);
1728 ctxt->inputMax /= 2;
1729 value = NULL;
1730 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001731 }
1732 }
1733 ctxt->inputTab[ctxt->inputNr] = value;
1734 ctxt->input = value;
1735 return (ctxt->inputNr++);
1736}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001737/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001738 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001739 * @ctxt: an XML parser context
1740 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001742 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001743 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001744 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001745xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001746inputPop(xmlParserCtxtPtr ctxt)
1747{
1748 xmlParserInputPtr ret;
1749
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001750 if (ctxt == NULL)
1751 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001752 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001753 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001754 ctxt->inputNr--;
1755 if (ctxt->inputNr > 0)
1756 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1757 else
1758 ctxt->input = NULL;
1759 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001760 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001761 return (ret);
1762}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001763/**
1764 * nodePush:
1765 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001766 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001767 *
1768 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001769 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001770 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001771 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001772int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001773nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1774{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001775 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001776 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001777 xmlNodePtr *tmp;
1778
1779 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1780 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001781 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001782 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001783 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001784 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001785 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001786 ctxt->nodeTab = tmp;
1787 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001788 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001789 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1790 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001791 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001792 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001793 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001794 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001795 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001796 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001797 ctxt->nodeTab[ctxt->nodeNr] = value;
1798 ctxt->node = value;
1799 return (ctxt->nodeNr++);
1800}
Daniel Veillard8915c152008-08-26 13:05:34 +00001801
Daniel Veillard1c732d22002-11-30 11:22:59 +00001802/**
1803 * nodePop:
1804 * @ctxt: an XML parser context
1805 *
1806 * Pops the top element node from the node stack
1807 *
1808 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001809 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001810xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001811nodePop(xmlParserCtxtPtr ctxt)
1812{
1813 xmlNodePtr ret;
1814
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001815 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001816 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001817 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001818 ctxt->nodeNr--;
1819 if (ctxt->nodeNr > 0)
1820 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1821 else
1822 ctxt->node = NULL;
1823 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001824 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001825 return (ret);
1826}
Daniel Veillarda2351322004-06-27 12:08:10 +00001827
1828#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001829/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001830 * nameNsPush:
1831 * @ctxt: an XML parser context
1832 * @value: the element name
1833 * @prefix: the element prefix
1834 * @URI: the element namespace name
1835 *
1836 * Pushes a new element name/prefix/URL on top of the name stack
1837 *
1838 * Returns -1 in case of error, the index in the stack otherwise
1839 */
1840static int
1841nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1842 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1843{
1844 if (ctxt->nameNr >= ctxt->nameMax) {
1845 const xmlChar * *tmp;
1846 void **tmp2;
1847 ctxt->nameMax *= 2;
1848 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1849 ctxt->nameMax *
1850 sizeof(ctxt->nameTab[0]));
1851 if (tmp == NULL) {
1852 ctxt->nameMax /= 2;
1853 goto mem_error;
1854 }
1855 ctxt->nameTab = tmp;
1856 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1857 ctxt->nameMax * 3 *
1858 sizeof(ctxt->pushTab[0]));
1859 if (tmp2 == NULL) {
1860 ctxt->nameMax /= 2;
1861 goto mem_error;
1862 }
1863 ctxt->pushTab = tmp2;
1864 }
1865 ctxt->nameTab[ctxt->nameNr] = value;
1866 ctxt->name = value;
1867 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1868 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001869 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 return (ctxt->nameNr++);
1871mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001872 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873 return (-1);
1874}
1875/**
1876 * nameNsPop:
1877 * @ctxt: an XML parser context
1878 *
1879 * Pops the top element/prefix/URI name from the name stack
1880 *
1881 * Returns the name just removed
1882 */
1883static const xmlChar *
1884nameNsPop(xmlParserCtxtPtr ctxt)
1885{
1886 const xmlChar *ret;
1887
1888 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001889 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001890 ctxt->nameNr--;
1891 if (ctxt->nameNr > 0)
1892 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1893 else
1894 ctxt->name = NULL;
1895 ret = ctxt->nameTab[ctxt->nameNr];
1896 ctxt->nameTab[ctxt->nameNr] = NULL;
1897 return (ret);
1898}
Daniel Veillarda2351322004-06-27 12:08:10 +00001899#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001900
1901/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 * namePush:
1903 * @ctxt: an XML parser context
1904 * @value: the element name
1905 *
1906 * Pushes a new element name on top of the name stack
1907 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001908 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001910int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001911namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001912{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001913 if (ctxt == NULL) return (-1);
1914
Daniel Veillard1c732d22002-11-30 11:22:59 +00001915 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001916 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001917 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001918 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001919 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001920 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001921 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001922 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001923 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001924 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001925 }
1926 ctxt->nameTab[ctxt->nameNr] = value;
1927 ctxt->name = value;
1928 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001929mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001930 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001931 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001932}
1933/**
1934 * namePop:
1935 * @ctxt: an XML parser context
1936 *
1937 * Pops the top element name from the name stack
1938 *
1939 * Returns the name just removed
1940 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001941const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001942namePop(xmlParserCtxtPtr ctxt)
1943{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001944 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001945
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001946 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1947 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001948 ctxt->nameNr--;
1949 if (ctxt->nameNr > 0)
1950 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1951 else
1952 ctxt->name = NULL;
1953 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001954 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001955 return (ret);
1956}
Owen Taylor3473f882001-02-23 17:55:21 +00001957
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001958static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001959 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001960 int *tmp;
1961
Owen Taylor3473f882001-02-23 17:55:21 +00001962 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001963 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1964 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1965 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001966 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001967 ctxt->spaceMax /=2;
1968 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001969 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001970 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001971 }
1972 ctxt->spaceTab[ctxt->spaceNr] = val;
1973 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1974 return(ctxt->spaceNr++);
1975}
1976
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001977static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001978 int ret;
1979 if (ctxt->spaceNr <= 0) return(0);
1980 ctxt->spaceNr--;
1981 if (ctxt->spaceNr > 0)
1982 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1983 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001984 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001985 ret = ctxt->spaceTab[ctxt->spaceNr];
1986 ctxt->spaceTab[ctxt->spaceNr] = -1;
1987 return(ret);
1988}
1989
1990/*
1991 * Macros for accessing the content. Those should be used only by the parser,
1992 * and not exported.
1993 *
1994 * Dirty macros, i.e. one often need to make assumption on the context to
1995 * use them
1996 *
1997 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1998 * To be used with extreme caution since operations consuming
1999 * characters may move the input buffer to a different location !
2000 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2001 * This should be used internally by the parser
2002 * only to compare to ASCII values otherwise it would break when
2003 * running with UTF-8 encoding.
2004 * RAW same as CUR but in the input buffer, bypass any token
2005 * extraction that may have been done
2006 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2007 * to compare on ASCII based substring.
2008 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002009 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002010 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002011 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002012 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2013 *
2014 * NEXT Skip to the next character, this does the proper decoding
2015 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002016 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002017 * CUR_CHAR(l) returns the current unicode character (int), set l
2018 * to the number of xmlChars used for the encoding [0-5].
2019 * CUR_SCHAR same but operate on a string instead of the context
2020 * COPY_BUF copy the current unicode char to the target buffer, increment
2021 * the index
2022 * GROW, SHRINK handling of input buffers
2023 */
2024
Daniel Veillardfdc91562002-07-01 21:52:03 +00002025#define RAW (*ctxt->input->cur)
2026#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002027#define NXT(val) ctxt->input->cur[(val)]
2028#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002029#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002030
Daniel Veillarda07050d2003-10-19 14:46:32 +00002031#define CMP4( s, c1, c2, c3, c4 ) \
2032 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2033 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2034#define CMP5( s, c1, c2, c3, c4, c5 ) \
2035 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2036#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2037 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2038#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2039 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2040#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2041 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2042#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2043 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2044 ((unsigned char *) s)[ 8 ] == c9 )
2045#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2046 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2047 ((unsigned char *) s)[ 9 ] == c10 )
2048
Owen Taylor3473f882001-02-23 17:55:21 +00002049#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002050 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002051 if (*ctxt->input->cur == 0) \
2052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002053 } while (0)
2054
Daniel Veillard0b787f32004-03-26 17:29:53 +00002055#define SKIPL(val) do { \
2056 int skipl; \
2057 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002058 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002059 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002060 } else ctxt->input->col++; \
2061 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002062 ctxt->input->cur++; \
2063 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002064 if (*ctxt->input->cur == 0) \
2065 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002066 } while (0)
2067
Daniel Veillarda880b122003-04-21 21:36:41 +00002068#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002069 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2070 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002071 xmlSHRINK (ctxt);
2072
2073static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2074 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002075 if (*ctxt->input->cur == 0)
2076 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2077}
Owen Taylor3473f882001-02-23 17:55:21 +00002078
Daniel Veillarda880b122003-04-21 21:36:41 +00002079#define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002081 xmlGROW (ctxt);
2082
2083static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2086
2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002090 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002092 xmlHaltParser(ctxt);
2093 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002094 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002096 if ((ctxt->input->cur > ctxt->input->end) ||
2097 (ctxt->input->cur < ctxt->input->base)) {
2098 xmlHaltParser(ctxt);
2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2100 return;
2101 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002104}
Owen Taylor3473f882001-02-23 17:55:21 +00002105
2106#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2107
2108#define NEXT xmlNextChar(ctxt)
2109
Daniel Veillard21a0f912001-02-25 19:54:14 +00002110#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002111 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002112 ctxt->input->cur++; \
2113 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002114 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002115 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2116 }
2117
Owen Taylor3473f882001-02-23 17:55:21 +00002118#define NEXTL(l) do { \
2119 if (*(ctxt->input->cur) == '\n') { \
2120 ctxt->input->line++; ctxt->input->col = 1; \
2121 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002122 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002123 } while (0)
2124
2125#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2126#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2127
2128#define COPY_BUF(l,b,i,v) \
2129 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002130 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002131
2132/**
2133 * xmlSkipBlankChars:
2134 * @ctxt: the XML parser context
2135 *
2136 * skip all blanks character found at that point in the input streams.
2137 * It pops up finished entities in the process if allowable at that point.
2138 *
2139 * Returns the number of space chars skipped
2140 */
2141
2142int
2143xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002144 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002145
2146 /*
2147 * It's Okay to use CUR/NEXT here since all the blanks are on
2148 * the ASCII range.
2149 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002150 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2151 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002152 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002153 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002154 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002155 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002156 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002157 if (*cur == '\n') {
2158 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002159 } else {
2160 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002161 }
2162 cur++;
2163 res++;
2164 if (*cur == 0) {
2165 ctxt->input->cur = cur;
2166 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2167 cur = ctxt->input->cur;
2168 }
2169 }
2170 ctxt->input->cur = cur;
2171 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002172 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2173
2174 while (1) {
2175 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002176 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002177 } else if (CUR == '%') {
2178 /*
2179 * Need to handle support of entities branching here
2180 */
2181 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2182 break;
2183 xmlParsePEReference(ctxt);
2184 } else if (CUR == 0) {
2185 if (ctxt->inputNr <= 1)
2186 break;
2187 xmlPopInput(ctxt);
2188 } else {
2189 break;
2190 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002191
2192 /*
2193 * Also increase the counter when entering or exiting a PERef.
2194 * The spec says: "When a parameter-entity reference is recognized
2195 * in the DTD and included, its replacement text MUST be enlarged
2196 * by the attachment of one leading and one following space (#x20)
2197 * character."
2198 */
2199 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002200 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002201 }
Owen Taylor3473f882001-02-23 17:55:21 +00002202 return(res);
2203}
2204
2205/************************************************************************
2206 * *
2207 * Commodity functions to handle entities *
2208 * *
2209 ************************************************************************/
2210
2211/**
2212 * xmlPopInput:
2213 * @ctxt: an XML parser context
2214 *
2215 * xmlPopInput: the current input pointed by ctxt->input came to an end
2216 * pop it and return the next char.
2217 *
2218 * Returns the current xmlChar in the parser context
2219 */
2220xmlChar
2221xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002222 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002223 if (xmlParserDebugEntities)
2224 xmlGenericError(xmlGenericErrorContext,
2225 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002226 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2227 (ctxt->instate != XML_PARSER_EOF))
2228 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2229 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002230 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002231 if (*ctxt->input->cur == 0)
2232 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002233 return(CUR);
2234}
2235
2236/**
2237 * xmlPushInput:
2238 * @ctxt: an XML parser context
2239 * @input: an XML parser input fragment (entity, XML fragment ...).
2240 *
2241 * xmlPushInput: switch to a new input stream which is stacked on top
2242 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002243 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002244 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002245int
Owen Taylor3473f882001-02-23 17:55:21 +00002246xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002247 int ret;
2248 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002249
2250 if (xmlParserDebugEntities) {
2251 if ((ctxt->input != NULL) && (ctxt->input->filename))
2252 xmlGenericError(xmlGenericErrorContext,
2253 "%s(%d): ", ctxt->input->filename,
2254 ctxt->input->line);
2255 xmlGenericError(xmlGenericErrorContext,
2256 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2257 }
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02002258 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2259 (ctxt->inputNr > 1024)) {
2260 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2261 while (ctxt->inputNr > 1)
2262 xmlFreeInputStream(inputPop(ctxt));
2263 return(-1);
2264 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002265 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002266 if (ctxt->instate == XML_PARSER_EOF)
2267 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002268 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002269 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002270}
2271
2272/**
2273 * xmlParseCharRef:
2274 * @ctxt: an XML parser context
2275 *
2276 * parse Reference declarations
2277 *
2278 * [66] CharRef ::= '&#' [0-9]+ ';' |
2279 * '&#x' [0-9a-fA-F]+ ';'
2280 *
2281 * [ WFC: Legal Character ]
2282 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002283 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002284 *
2285 * Returns the value parsed (as an int), 0 in case of error
2286 */
2287int
2288xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002289 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002290 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002291 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002292
Owen Taylor3473f882001-02-23 17:55:21 +00002293 /*
2294 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2295 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002296 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002297 (NXT(2) == 'x')) {
2298 SKIP(3);
2299 GROW;
2300 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002301 if (count++ > 20) {
2302 count = 0;
2303 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002304 if (ctxt->instate == XML_PARSER_EOF)
2305 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002306 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002307 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002308 val = val * 16 + (CUR - '0');
2309 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2310 val = val * 16 + (CUR - 'a') + 10;
2311 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2312 val = val * 16 + (CUR - 'A') + 10;
2313 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002314 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002315 val = 0;
2316 break;
2317 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002318 if (val > 0x10FFFF)
2319 outofrange = val;
2320
Owen Taylor3473f882001-02-23 17:55:21 +00002321 NEXT;
2322 count++;
2323 }
2324 if (RAW == ';') {
2325 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002326 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002327 ctxt->nbChars ++;
2328 ctxt->input->cur++;
2329 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002330 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002331 SKIP(2);
2332 GROW;
2333 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002334 if (count++ > 20) {
2335 count = 0;
2336 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002337 if (ctxt->instate == XML_PARSER_EOF)
2338 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002339 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002340 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002341 val = val * 10 + (CUR - '0');
2342 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002343 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002344 val = 0;
2345 break;
2346 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002347 if (val > 0x10FFFF)
2348 outofrange = val;
2349
Owen Taylor3473f882001-02-23 17:55:21 +00002350 NEXT;
2351 count++;
2352 }
2353 if (RAW == ';') {
2354 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002355 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002356 ctxt->nbChars ++;
2357 ctxt->input->cur++;
2358 }
2359 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002360 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002361 }
2362
2363 /*
2364 * [ WFC: Legal Character ]
2365 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002366 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002367 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002368 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002369 return(val);
2370 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002371 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372 "xmlParseCharRef: invalid xmlChar value %d\n",
2373 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002374 }
2375 return(0);
2376}
2377
2378/**
2379 * xmlParseStringCharRef:
2380 * @ctxt: an XML parser context
2381 * @str: a pointer to an index in the string
2382 *
2383 * parse Reference declarations, variant parsing from a string rather
2384 * than an an input flow.
2385 *
2386 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387 * '&#x' [0-9a-fA-F]+ ';'
2388 *
2389 * [ WFC: Legal Character ]
2390 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002391 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002392 *
2393 * Returns the value parsed (as an int), 0 in case of error, str will be
2394 * updated to the current value of the index
2395 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002396static int
Owen Taylor3473f882001-02-23 17:55:21 +00002397xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2398 const xmlChar *ptr;
2399 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002400 unsigned int val = 0;
2401 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002402
2403 if ((str == NULL) || (*str == NULL)) return(0);
2404 ptr = *str;
2405 cur = *ptr;
2406 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2407 ptr += 3;
2408 cur = *ptr;
2409 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002410 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002411 val = val * 16 + (cur - '0');
2412 else if ((cur >= 'a') && (cur <= 'f'))
2413 val = val * 16 + (cur - 'a') + 10;
2414 else if ((cur >= 'A') && (cur <= 'F'))
2415 val = val * 16 + (cur - 'A') + 10;
2416 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002417 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002418 val = 0;
2419 break;
2420 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002421 if (val > 0x10FFFF)
2422 outofrange = val;
2423
Owen Taylor3473f882001-02-23 17:55:21 +00002424 ptr++;
2425 cur = *ptr;
2426 }
2427 if (cur == ';')
2428 ptr++;
2429 } else if ((cur == '&') && (ptr[1] == '#')){
2430 ptr += 2;
2431 cur = *ptr;
2432 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002433 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002434 val = val * 10 + (cur - '0');
2435 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002436 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002437 val = 0;
2438 break;
2439 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002440 if (val > 0x10FFFF)
2441 outofrange = val;
2442
Owen Taylor3473f882001-02-23 17:55:21 +00002443 ptr++;
2444 cur = *ptr;
2445 }
2446 if (cur == ';')
2447 ptr++;
2448 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002449 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002450 return(0);
2451 }
2452 *str = ptr;
2453
2454 /*
2455 * [ WFC: Legal Character ]
2456 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002457 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002458 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002459 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002460 return(val);
2461 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002462 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2463 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2464 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002465 }
2466 return(0);
2467}
2468
2469/**
2470 * xmlParserHandlePEReference:
2471 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002472 *
Owen Taylor3473f882001-02-23 17:55:21 +00002473 * [69] PEReference ::= '%' Name ';'
2474 *
2475 * [ WFC: No Recursion ]
2476 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002477 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002478 *
2479 * [ WFC: Entity Declared ]
2480 * In a document without any DTD, a document with only an internal DTD
2481 * subset which contains no parameter entity references, or a document
2482 * with "standalone='yes'", ... ... The declaration of a parameter
2483 * entity must precede any reference to it...
2484 *
2485 * [ VC: Entity Declared ]
2486 * In a document with an external subset or external parameter entities
2487 * with "standalone='no'", ... ... The declaration of a parameter entity
2488 * must precede any reference to it...
2489 *
2490 * [ WFC: In DTD ]
2491 * Parameter-entity references may only appear in the DTD.
2492 * NOTE: misleading but this is handled.
2493 *
2494 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002495 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002496 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002497 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002498 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002499 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002500 */
2501void
2502xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002503 switch(ctxt->instate) {
2504 case XML_PARSER_CDATA_SECTION:
2505 return;
2506 case XML_PARSER_COMMENT:
2507 return;
2508 case XML_PARSER_START_TAG:
2509 return;
2510 case XML_PARSER_END_TAG:
2511 return;
2512 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002513 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002514 return;
2515 case XML_PARSER_PROLOG:
2516 case XML_PARSER_START:
2517 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002518 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002519 return;
2520 case XML_PARSER_ENTITY_DECL:
2521 case XML_PARSER_CONTENT:
2522 case XML_PARSER_ATTRIBUTE_VALUE:
2523 case XML_PARSER_PI:
2524 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002525 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002526 /* we just ignore it there */
2527 return;
2528 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002529 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002530 return;
2531 case XML_PARSER_ENTITY_VALUE:
2532 /*
2533 * NOTE: in the case of entity values, we don't do the
2534 * substitution here since we need the literal
2535 * entity value to be able to save the internal
2536 * subset of the document.
2537 * This will be handled by xmlStringDecodeEntities
2538 */
2539 return;
2540 case XML_PARSER_DTD:
2541 /*
2542 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2543 * In the internal DTD subset, parameter-entity references
2544 * can occur only where markup declarations can occur, not
2545 * within markup declarations.
2546 * In that case this is handled in xmlParseMarkupDecl
2547 */
2548 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2549 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002550 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002551 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002552 break;
2553 case XML_PARSER_IGNORE:
2554 return;
2555 }
2556
Nick Wellnhofer03904152017-06-05 21:16:00 +02002557 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002558}
2559
2560/*
2561 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002562 * buffer##_size is expected to be a size_t
2563 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002564 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002565#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002566 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002567 size_t new_size = buffer##_size * 2 + n; \
2568 if (new_size < buffer##_size) goto mem_error; \
2569 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002570 if (tmp == NULL) goto mem_error; \
2571 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002572 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002573}
2574
2575/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002576 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002577 * @ctxt: the parser context
2578 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002579 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002580 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2581 * @end: an end marker xmlChar, 0 if none
2582 * @end2: an end marker xmlChar, 0 if none
2583 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002584 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002585 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002586 *
2587 * [67] Reference ::= EntityRef | CharRef
2588 *
2589 * [69] PEReference ::= '%' Name ';'
2590 *
2591 * Returns A newly allocated string with the substitution done. The caller
2592 * must deallocate it !
2593 */
2594xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002595xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2596 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002597 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002598 size_t buffer_size = 0;
2599 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002600
2601 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002602 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002603 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002604 xmlEntityPtr ent;
2605 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002606
Daniel Veillarda82b1822004-11-08 16:24:57 +00002607 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002608 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002609 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002610
Daniel Veillard0161e632008-08-28 15:36:32 +00002611 if (((ctxt->depth > 40) &&
2612 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2613 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002614 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002615 return(NULL);
2616 }
2617
2618 /*
2619 * allocate a translation buffer.
2620 */
2621 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002622 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002623 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002624
2625 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002626 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002627 * we are operating on already parsed values.
2628 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002629 if (str < last)
2630 c = CUR_SCHAR(str, l);
2631 else
2632 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002633 while ((c != 0) && (c != end) && /* non input consuming loop */
2634 (c != end2) && (c != end3)) {
2635
2636 if (c == 0) break;
2637 if ((c == '&') && (str[1] == '#')) {
2638 int val = xmlParseStringCharRef(ctxt, &str);
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002639 if (val == 0)
2640 goto int_error;
2641 COPY_BUF(0,buffer,nbchars,val);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002642 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002643 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002644 }
Owen Taylor3473f882001-02-23 17:55:21 +00002645 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2646 if (xmlParserDebugEntities)
2647 xmlGenericError(xmlGenericErrorContext,
2648 "String decoding Entity Reference: %.30s\n",
2649 str);
2650 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002651 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002652 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002653 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002654 if ((ent != NULL) &&
2655 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2656 if (ent->content != NULL) {
2657 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002658 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002659 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002660 }
Owen Taylor3473f882001-02-23 17:55:21 +00002661 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002662 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2663 "predefined entity has no content\n");
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002664 goto int_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002665 }
2666 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002667 ctxt->depth++;
2668 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2669 0, 0, 0);
2670 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002671 if (rep == NULL)
2672 goto int_error;
Daniel Veillard0161e632008-08-28 15:36:32 +00002673
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002674 current = rep;
2675 while (*current != 0) { /* non input consuming loop */
2676 buffer[nbchars++] = *current++;
2677 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2679 goto int_error;
2680 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2681 }
2682 }
2683 xmlFree(rep);
2684 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002685 } else if (ent != NULL) {
2686 int i = xmlStrlen(ent->name);
2687 const xmlChar *cur = ent->name;
2688
2689 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002690 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002691 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002692 }
2693 for (;i > 0;i--)
2694 buffer[nbchars++] = *cur++;
2695 buffer[nbchars++] = ';';
2696 }
2697 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2698 if (xmlParserDebugEntities)
2699 xmlGenericError(xmlGenericErrorContext,
2700 "String decoding PE Reference: %.30s\n", str);
2701 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002702 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002703 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002704 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002705 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002706 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002707 /*
2708 * Note: external parsed entities will not be loaded,
2709 * it is not required for a non-validating parser to
2710 * complete external PEreferences coming from the
2711 * internal subset
2712 */
2713 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2714 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2715 (ctxt->validate != 0)) {
2716 xmlLoadEntityContent(ctxt, ent);
2717 } else {
2718 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2719 "not validating will not read content for PE entity %s\n",
2720 ent->name, NULL);
2721 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002722 }
Owen Taylor3473f882001-02-23 17:55:21 +00002723 ctxt->depth++;
2724 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2725 0, 0, 0);
2726 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002727 if (rep == NULL)
2728 goto int_error;
2729 current = rep;
2730 while (*current != 0) { /* non input consuming loop */
2731 buffer[nbchars++] = *current++;
2732 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2733 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2734 goto int_error;
2735 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2736 }
2737 }
2738 xmlFree(rep);
2739 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002740 }
2741 } else {
2742 COPY_BUF(l,buffer,nbchars,c);
2743 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002746 }
2747 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002748 if (str < last)
2749 c = CUR_SCHAR(str, l);
2750 else
2751 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002752 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002753 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002754 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002755
2756mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002757 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002758int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002759 if (rep != NULL)
2760 xmlFree(rep);
2761 if (buffer != NULL)
2762 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002763 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002764}
2765
Daniel Veillarde57ec792003-09-10 10:50:59 +00002766/**
2767 * xmlStringDecodeEntities:
2768 * @ctxt: the parser context
2769 * @str: the input string
2770 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2771 * @end: an end marker xmlChar, 0 if none
2772 * @end2: an end marker xmlChar, 0 if none
2773 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002774 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002775 * Takes a entity string content and process to do the adequate substitutions.
2776 *
2777 * [67] Reference ::= EntityRef | CharRef
2778 *
2779 * [69] PEReference ::= '%' Name ';'
2780 *
2781 * Returns A newly allocated string with the substitution done. The caller
2782 * must deallocate it !
2783 */
2784xmlChar *
2785xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2786 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002787 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002788 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2789 end, end2, end3));
2790}
Owen Taylor3473f882001-02-23 17:55:21 +00002791
2792/************************************************************************
2793 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002794 * Commodity functions, cleanup needed ? *
2795 * *
2796 ************************************************************************/
2797
2798/**
2799 * areBlanks:
2800 * @ctxt: an XML parser context
2801 * @str: a xmlChar *
2802 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002803 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002804 *
2805 * Is this a sequence of blank chars that one can ignore ?
2806 *
2807 * Returns 1 if ignorable 0 otherwise.
2808 */
2809
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002810static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2811 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002812 int i, ret;
2813 xmlNodePtr lastChild;
2814
Daniel Veillard05c13a22001-09-09 08:38:09 +00002815 /*
2816 * Don't spend time trying to differentiate them, the same callback is
2817 * used !
2818 */
2819 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002820 return(0);
2821
Owen Taylor3473f882001-02-23 17:55:21 +00002822 /*
2823 * Check for xml:space value.
2824 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002825 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2826 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002827 return(0);
2828
2829 /*
2830 * Check that the string is made of blanks
2831 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002832 if (blank_chars == 0) {
2833 for (i = 0;i < len;i++)
2834 if (!(IS_BLANK_CH(str[i]))) return(0);
2835 }
Owen Taylor3473f882001-02-23 17:55:21 +00002836
2837 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002838 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002839 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002840 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002841 if (ctxt->myDoc != NULL) {
2842 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2843 if (ret == 0) return(1);
2844 if (ret == 1) return(0);
2845 }
2846
2847 /*
2848 * Otherwise, heuristic :-\
2849 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002850 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002851 if ((ctxt->node->children == NULL) &&
2852 (RAW == '<') && (NXT(1) == '/')) return(0);
2853
2854 lastChild = xmlGetLastChild(ctxt->node);
2855 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002856 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2857 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002858 } else if (xmlNodeIsText(lastChild))
2859 return(0);
2860 else if ((ctxt->node->children != NULL) &&
2861 (xmlNodeIsText(ctxt->node->children)))
2862 return(0);
2863 return(1);
2864}
2865
Owen Taylor3473f882001-02-23 17:55:21 +00002866/************************************************************************
2867 * *
2868 * Extra stuff for namespace support *
2869 * Relates to http://www.w3.org/TR/WD-xml-names *
2870 * *
2871 ************************************************************************/
2872
2873/**
2874 * xmlSplitQName:
2875 * @ctxt: an XML parser context
2876 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002877 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002878 *
2879 * parse an UTF8 encoded XML qualified name string
2880 *
2881 * [NS 5] QName ::= (Prefix ':')? LocalPart
2882 *
2883 * [NS 6] Prefix ::= NCName
2884 *
2885 * [NS 7] LocalPart ::= NCName
2886 *
2887 * Returns the local part, and prefix is updated
2888 * to get the Prefix if any.
2889 */
2890
2891xmlChar *
2892xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2893 xmlChar buf[XML_MAX_NAMELEN + 5];
2894 xmlChar *buffer = NULL;
2895 int len = 0;
2896 int max = XML_MAX_NAMELEN;
2897 xmlChar *ret = NULL;
2898 const xmlChar *cur = name;
2899 int c;
2900
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002901 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002902 *prefix = NULL;
2903
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002904 if (cur == NULL) return(NULL);
2905
Owen Taylor3473f882001-02-23 17:55:21 +00002906#ifndef XML_XML_NAMESPACE
2907 /* xml: prefix is not really a namespace */
2908 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2909 (cur[2] == 'l') && (cur[3] == ':'))
2910 return(xmlStrdup(name));
2911#endif
2912
Daniel Veillard597bc482003-07-24 16:08:28 +00002913 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002914 if (cur[0] == ':')
2915 return(xmlStrdup(name));
2916
2917 c = *cur++;
2918 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2919 buf[len++] = c;
2920 c = *cur++;
2921 }
2922 if (len >= max) {
2923 /*
2924 * Okay someone managed to make a huge name, so he's ready to pay
2925 * for the processing speed.
2926 */
2927 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002928
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002929 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002930 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002931 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002932 return(NULL);
2933 }
2934 memcpy(buffer, buf, len);
2935 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2936 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002937 xmlChar *tmp;
2938
Owen Taylor3473f882001-02-23 17:55:21 +00002939 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002940 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002941 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002942 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002943 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002944 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002945 return(NULL);
2946 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002948 }
2949 buffer[len++] = c;
2950 c = *cur++;
2951 }
2952 buffer[len] = 0;
2953 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002954
Daniel Veillard597bc482003-07-24 16:08:28 +00002955 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002956 if (buffer != NULL)
2957 xmlFree(buffer);
2958 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002959 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002960 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002961
Owen Taylor3473f882001-02-23 17:55:21 +00002962 if (buffer == NULL)
2963 ret = xmlStrndup(buf, len);
2964 else {
2965 ret = buffer;
2966 buffer = NULL;
2967 max = XML_MAX_NAMELEN;
2968 }
2969
2970
2971 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002972 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002973 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002974 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002975 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002976 }
Owen Taylor3473f882001-02-23 17:55:21 +00002977 len = 0;
2978
Daniel Veillardbb284f42002-10-16 18:02:47 +00002979 /*
2980 * Check that the first character is proper to start
2981 * a new name
2982 */
2983 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2984 ((c >= 0x41) && (c <= 0x5A)) ||
2985 (c == '_') || (c == ':'))) {
2986 int l;
2987 int first = CUR_SCHAR(cur, l);
2988
2989 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002990 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002991 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002992 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002993 }
2994 }
2995 cur++;
2996
Owen Taylor3473f882001-02-23 17:55:21 +00002997 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2998 buf[len++] = c;
2999 c = *cur++;
3000 }
3001 if (len >= max) {
3002 /*
3003 * Okay someone managed to make a huge name, so he's ready to pay
3004 * for the processing speed.
3005 */
3006 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003007
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003008 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003009 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003010 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003011 return(NULL);
3012 }
3013 memcpy(buffer, buf, len);
3014 while (c != 0) { /* tested bigname2.xml */
3015 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003016 xmlChar *tmp;
3017
Owen Taylor3473f882001-02-23 17:55:21 +00003018 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003019 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003020 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003021 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003022 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003023 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003024 return(NULL);
3025 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003026 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003027 }
3028 buffer[len++] = c;
3029 c = *cur++;
3030 }
3031 buffer[len] = 0;
3032 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003033
Owen Taylor3473f882001-02-23 17:55:21 +00003034 if (buffer == NULL)
3035 ret = xmlStrndup(buf, len);
3036 else {
3037 ret = buffer;
3038 }
3039 }
3040
3041 return(ret);
3042}
3043
3044/************************************************************************
3045 * *
3046 * The parser itself *
3047 * Relates to http://www.w3.org/TR/REC-xml *
3048 * *
3049 ************************************************************************/
3050
Daniel Veillard34e3f642008-07-29 09:02:27 +00003051/************************************************************************
3052 * *
3053 * Routines to parse Name, NCName and NmToken *
3054 * *
3055 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003056#ifdef DEBUG
3057static unsigned long nbParseName = 0;
3058static unsigned long nbParseNmToken = 0;
3059static unsigned long nbParseNCName = 0;
3060static unsigned long nbParseNCNameComplex = 0;
3061static unsigned long nbParseNameComplex = 0;
3062static unsigned long nbParseStringName = 0;
3063#endif
3064
Daniel Veillard34e3f642008-07-29 09:02:27 +00003065/*
3066 * The two following functions are related to the change of accepted
3067 * characters for Name and NmToken in the Revision 5 of XML-1.0
3068 * They correspond to the modified production [4] and the new production [4a]
3069 * changes in that revision. Also note that the macros used for the
3070 * productions Letter, Digit, CombiningChar and Extender are not needed
3071 * anymore.
3072 * We still keep compatibility to pre-revision5 parsing semantic if the
3073 * new XML_PARSE_OLD10 option is given to the parser.
3074 */
3075static int
3076xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3077 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3078 /*
3079 * Use the new checks of production [4] [4a] amd [5] of the
3080 * Update 5 of XML-1.0
3081 */
3082 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3083 (((c >= 'a') && (c <= 'z')) ||
3084 ((c >= 'A') && (c <= 'Z')) ||
3085 (c == '_') || (c == ':') ||
3086 ((c >= 0xC0) && (c <= 0xD6)) ||
3087 ((c >= 0xD8) && (c <= 0xF6)) ||
3088 ((c >= 0xF8) && (c <= 0x2FF)) ||
3089 ((c >= 0x370) && (c <= 0x37D)) ||
3090 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3091 ((c >= 0x200C) && (c <= 0x200D)) ||
3092 ((c >= 0x2070) && (c <= 0x218F)) ||
3093 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3094 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3095 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3096 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3097 ((c >= 0x10000) && (c <= 0xEFFFF))))
3098 return(1);
3099 } else {
3100 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3101 return(1);
3102 }
3103 return(0);
3104}
3105
3106static int
3107xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3108 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3109 /*
3110 * Use the new checks of production [4] [4a] amd [5] of the
3111 * Update 5 of XML-1.0
3112 */
3113 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3114 (((c >= 'a') && (c <= 'z')) ||
3115 ((c >= 'A') && (c <= 'Z')) ||
3116 ((c >= '0') && (c <= '9')) || /* !start */
3117 (c == '_') || (c == ':') ||
3118 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3119 ((c >= 0xC0) && (c <= 0xD6)) ||
3120 ((c >= 0xD8) && (c <= 0xF6)) ||
3121 ((c >= 0xF8) && (c <= 0x2FF)) ||
3122 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3123 ((c >= 0x370) && (c <= 0x37D)) ||
3124 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3125 ((c >= 0x200C) && (c <= 0x200D)) ||
3126 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3127 ((c >= 0x2070) && (c <= 0x218F)) ||
3128 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3129 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3130 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3131 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3132 ((c >= 0x10000) && (c <= 0xEFFFF))))
3133 return(1);
3134 } else {
3135 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3136 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003137 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003138 (IS_COMBINING(c)) ||
3139 (IS_EXTENDER(c)))
3140 return(1);
3141 }
3142 return(0);
3143}
3144
Daniel Veillarde57ec792003-09-10 10:50:59 +00003145static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003146 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003147
Daniel Veillard34e3f642008-07-29 09:02:27 +00003148static const xmlChar *
3149xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3150 int len = 0, l;
3151 int c;
3152 int count = 0;
3153
Daniel Veillardc6561462009-03-25 10:22:31 +00003154#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003155 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003156#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003157
3158 /*
3159 * Handler for more complex cases
3160 */
3161 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003162 if (ctxt->instate == XML_PARSER_EOF)
3163 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003164 c = CUR_CHAR(l);
3165 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166 /*
3167 * Use the new checks of production [4] [4a] amd [5] of the
3168 * Update 5 of XML-1.0
3169 */
3170 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3171 (!(((c >= 'a') && (c <= 'z')) ||
3172 ((c >= 'A') && (c <= 'Z')) ||
3173 (c == '_') || (c == ':') ||
3174 ((c >= 0xC0) && (c <= 0xD6)) ||
3175 ((c >= 0xD8) && (c <= 0xF6)) ||
3176 ((c >= 0xF8) && (c <= 0x2FF)) ||
3177 ((c >= 0x370) && (c <= 0x37D)) ||
3178 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179 ((c >= 0x200C) && (c <= 0x200D)) ||
3180 ((c >= 0x2070) && (c <= 0x218F)) ||
3181 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3182 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3183 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3184 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3185 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3186 return(NULL);
3187 }
3188 len += l;
3189 NEXTL(l);
3190 c = CUR_CHAR(l);
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3192 (((c >= 'a') && (c <= 'z')) ||
3193 ((c >= 'A') && (c <= 'Z')) ||
3194 ((c >= '0') && (c <= '9')) || /* !start */
3195 (c == '_') || (c == ':') ||
3196 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3197 ((c >= 0xC0) && (c <= 0xD6)) ||
3198 ((c >= 0xD8) && (c <= 0xF6)) ||
3199 ((c >= 0xF8) && (c <= 0x2FF)) ||
3200 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3201 ((c >= 0x370) && (c <= 0x37D)) ||
3202 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203 ((c >= 0x200C) && (c <= 0x200D)) ||
3204 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3205 ((c >= 0x2070) && (c <= 0x218F)) ||
3206 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3207 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3208 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3209 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3210 ((c >= 0x10000) && (c <= 0xEFFFF))
3211 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003212 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003213 count = 0;
3214 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003215 if (ctxt->instate == XML_PARSER_EOF)
3216 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003217 }
3218 len += l;
3219 NEXTL(l);
3220 c = CUR_CHAR(l);
3221 }
3222 } else {
3223 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3224 (!IS_LETTER(c) && (c != '_') &&
3225 (c != ':'))) {
3226 return(NULL);
3227 }
3228 len += l;
3229 NEXTL(l);
3230 c = CUR_CHAR(l);
3231
3232 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3233 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3234 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003235 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003236 (IS_COMBINING(c)) ||
3237 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003238 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003239 count = 0;
3240 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003241 if (ctxt->instate == XML_PARSER_EOF)
3242 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003243 }
3244 len += l;
3245 NEXTL(l);
3246 c = CUR_CHAR(l);
3247 }
3248 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003249 if ((len > XML_MAX_NAME_LENGTH) &&
3250 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3251 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3252 return(NULL);
3253 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003254 if (ctxt->input->cur - ctxt->input->base < len) {
3255 /*
3256 * There were a couple of bugs where PERefs lead to to a change
3257 * of the buffer. Check the buffer size to avoid passing an invalid
3258 * pointer to xmlDictLookup.
3259 */
3260 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3261 "unexpected change of input buffer");
3262 return (NULL);
3263 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003264 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3265 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3266 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3267}
3268
Owen Taylor3473f882001-02-23 17:55:21 +00003269/**
3270 * xmlParseName:
3271 * @ctxt: an XML parser context
3272 *
3273 * parse an XML name.
3274 *
3275 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3276 * CombiningChar | Extender
3277 *
3278 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3279 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003280 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003281 *
3282 * Returns the Name parsed or NULL
3283 */
3284
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003285const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003286xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003287 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003288 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003289 int count = 0;
3290
3291 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003292
Daniel Veillardc6561462009-03-25 10:22:31 +00003293#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003294 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003295#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003296
Daniel Veillard48b2f892001-02-25 16:11:03 +00003297 /*
3298 * Accelerator for simple ASCII names
3299 */
3300 in = ctxt->input->cur;
3301 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3302 ((*in >= 0x41) && (*in <= 0x5A)) ||
3303 (*in == '_') || (*in == ':')) {
3304 in++;
3305 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3306 ((*in >= 0x41) && (*in <= 0x5A)) ||
3307 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003308 (*in == '_') || (*in == '-') ||
3309 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003310 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003311 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003312 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003313 if ((count > XML_MAX_NAME_LENGTH) &&
3314 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3315 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316 return(NULL);
3317 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003318 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003319 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003320 ctxt->nbChars += count;
3321 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003322 if (ret == NULL)
3323 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003324 return(ret);
3325 }
3326 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003328 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003329}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003330
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331static const xmlChar *
3332xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3333 int len = 0, l;
3334 int c;
3335 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003336 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003337
Daniel Veillardc6561462009-03-25 10:22:31 +00003338#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003339 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003340#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003341
3342 /*
3343 * Handler for more complex cases
3344 */
3345 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003346 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003347 c = CUR_CHAR(l);
3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3350 return(NULL);
3351 }
3352
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3354 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003355 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003356 if ((len > XML_MAX_NAME_LENGTH) &&
3357 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3358 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3359 return(NULL);
3360 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003361 count = 0;
3362 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003363 if (ctxt->instate == XML_PARSER_EOF)
3364 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003365 }
3366 len += l;
3367 NEXTL(l);
3368 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003369 if (c == 0) {
3370 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003371 /*
3372 * when shrinking to extend the buffer we really need to preserve
3373 * the part of the name we already parsed. Hence rolling back
3374 * by current lenght.
3375 */
3376 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003377 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003378 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003379 if (ctxt->instate == XML_PARSER_EOF)
3380 return(NULL);
3381 c = CUR_CHAR(l);
3382 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003383 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003384 if ((len > XML_MAX_NAME_LENGTH) &&
3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3387 return(NULL);
3388 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003389 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003390}
3391
3392/**
3393 * xmlParseNCName:
3394 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003395 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003396 *
3397 * parse an XML name.
3398 *
3399 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3400 * CombiningChar | Extender
3401 *
3402 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3403 *
3404 * Returns the Name parsed or NULL
3405 */
3406
3407static const xmlChar *
3408xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003409 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003410 const xmlChar *ret;
3411 int count = 0;
3412
Daniel Veillardc6561462009-03-25 10:22:31 +00003413#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003414 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003415#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003416
3417 /*
3418 * Accelerator for simple ASCII names
3419 */
3420 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003421 e = ctxt->input->end;
3422 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3423 ((*in >= 0x41) && (*in <= 0x5A)) ||
3424 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003425 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003426 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3427 ((*in >= 0x41) && (*in <= 0x5A)) ||
3428 ((*in >= 0x30) && (*in <= 0x39)) ||
3429 (*in == '_') || (*in == '-') ||
3430 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003431 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003432 if (in >= e)
3433 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003434 if ((*in > 0) && (*in < 0x80)) {
3435 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003436 if ((count > XML_MAX_NAME_LENGTH) &&
3437 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3438 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3439 return(NULL);
3440 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003441 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3442 ctxt->input->cur = in;
3443 ctxt->nbChars += count;
3444 ctxt->input->col += count;
3445 if (ret == NULL) {
3446 xmlErrMemory(ctxt, NULL);
3447 }
3448 return(ret);
3449 }
3450 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003451complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003452 return(xmlParseNCNameComplex(ctxt));
3453}
3454
Daniel Veillard46de64e2002-05-29 08:21:33 +00003455/**
3456 * xmlParseNameAndCompare:
3457 * @ctxt: an XML parser context
3458 *
3459 * parse an XML name and compares for match
3460 * (specialized for endtag parsing)
3461 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003462 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3463 * and the name for mismatch
3464 */
3465
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003466static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003467xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003468 register const xmlChar *cmp = other;
3469 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003470 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003471
3472 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003473 if (ctxt->instate == XML_PARSER_EOF)
3474 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003475
Daniel Veillard46de64e2002-05-29 08:21:33 +00003476 in = ctxt->input->cur;
3477 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003479 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003480 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003481 }
William M. Brack76e95df2003-10-18 16:20:14 +00003482 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003483 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003484 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003485 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003486 }
3487 /* failure (or end of input buffer), check with full function */
3488 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003489 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003490 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003491 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003492 }
3493 return ret;
3494}
3495
Owen Taylor3473f882001-02-23 17:55:21 +00003496/**
3497 * xmlParseStringName:
3498 * @ctxt: an XML parser context
3499 * @str: a pointer to the string pointer (IN/OUT)
3500 *
3501 * parse an XML name.
3502 *
3503 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3504 * CombiningChar | Extender
3505 *
3506 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3507 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003508 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003509 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003510 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003511 * is updated to the current location in the string.
3512 */
3513
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003514static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003515xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3516 xmlChar buf[XML_MAX_NAMELEN + 5];
3517 const xmlChar *cur = *str;
3518 int len = 0, l;
3519 int c;
3520
Daniel Veillardc6561462009-03-25 10:22:31 +00003521#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003522 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003523#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003524
Owen Taylor3473f882001-02-23 17:55:21 +00003525 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003526 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003527 return(NULL);
3528 }
3529
Daniel Veillard34e3f642008-07-29 09:02:27 +00003530 COPY_BUF(l,buf,len,c);
3531 cur += l;
3532 c = CUR_SCHAR(cur, l);
3533 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003534 COPY_BUF(l,buf,len,c);
3535 cur += l;
3536 c = CUR_SCHAR(cur, l);
3537 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3538 /*
3539 * Okay someone managed to make a huge name, so he's ready to pay
3540 * for the processing speed.
3541 */
3542 xmlChar *buffer;
3543 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003544
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003545 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003546 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003547 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003548 return(NULL);
3549 }
3550 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003551 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003552 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003553 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003554
3555 if ((len > XML_MAX_NAME_LENGTH) &&
3556 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3557 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3558 xmlFree(buffer);
3559 return(NULL);
3560 }
Owen Taylor3473f882001-02-23 17:55:21 +00003561 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003562 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003563 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003564 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003565 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003566 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003567 return(NULL);
3568 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003569 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003570 }
3571 COPY_BUF(l,buffer,len,c);
3572 cur += l;
3573 c = CUR_SCHAR(cur, l);
3574 }
3575 buffer[len] = 0;
3576 *str = cur;
3577 return(buffer);
3578 }
3579 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003580 if ((len > XML_MAX_NAME_LENGTH) &&
3581 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3582 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3583 return(NULL);
3584 }
Owen Taylor3473f882001-02-23 17:55:21 +00003585 *str = cur;
3586 return(xmlStrndup(buf, len));
3587}
3588
3589/**
3590 * xmlParseNmtoken:
3591 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003592 *
Owen Taylor3473f882001-02-23 17:55:21 +00003593 * parse an XML Nmtoken.
3594 *
3595 * [7] Nmtoken ::= (NameChar)+
3596 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003597 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003598 *
3599 * Returns the Nmtoken parsed or NULL
3600 */
3601
3602xmlChar *
3603xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3604 xmlChar buf[XML_MAX_NAMELEN + 5];
3605 int len = 0, l;
3606 int c;
3607 int count = 0;
3608
Daniel Veillardc6561462009-03-25 10:22:31 +00003609#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003610 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003611#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003612
Owen Taylor3473f882001-02-23 17:55:21 +00003613 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003614 if (ctxt->instate == XML_PARSER_EOF)
3615 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003616 c = CUR_CHAR(l);
3617
Daniel Veillard34e3f642008-07-29 09:02:27 +00003618 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003619 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003620 count = 0;
3621 GROW;
3622 }
3623 COPY_BUF(l,buf,len,c);
3624 NEXTL(l);
3625 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003626 if (c == 0) {
3627 count = 0;
3628 GROW;
3629 if (ctxt->instate == XML_PARSER_EOF)
3630 return(NULL);
3631 c = CUR_CHAR(l);
3632 }
Owen Taylor3473f882001-02-23 17:55:21 +00003633 if (len >= XML_MAX_NAMELEN) {
3634 /*
3635 * Okay someone managed to make a huge token, so he's ready to pay
3636 * for the processing speed.
3637 */
3638 xmlChar *buffer;
3639 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003640
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003641 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003642 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003643 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003644 return(NULL);
3645 }
3646 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003647 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003648 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003649 count = 0;
3650 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003651 if (ctxt->instate == XML_PARSER_EOF) {
3652 xmlFree(buffer);
3653 return(NULL);
3654 }
Owen Taylor3473f882001-02-23 17:55:21 +00003655 }
3656 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003657 xmlChar *tmp;
3658
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003659 if ((max > XML_MAX_NAME_LENGTH) &&
3660 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3661 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3662 xmlFree(buffer);
3663 return(NULL);
3664 }
Owen Taylor3473f882001-02-23 17:55:21 +00003665 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003666 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003667 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003668 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003669 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003670 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003671 return(NULL);
3672 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003673 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003674 }
3675 COPY_BUF(l,buffer,len,c);
3676 NEXTL(l);
3677 c = CUR_CHAR(l);
3678 }
3679 buffer[len] = 0;
3680 return(buffer);
3681 }
3682 }
3683 if (len == 0)
3684 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003685 if ((len > XML_MAX_NAME_LENGTH) &&
3686 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3687 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3688 return(NULL);
3689 }
Owen Taylor3473f882001-02-23 17:55:21 +00003690 return(xmlStrndup(buf, len));
3691}
3692
3693/**
3694 * xmlParseEntityValue:
3695 * @ctxt: an XML parser context
3696 * @orig: if non-NULL store a copy of the original entity value
3697 *
3698 * parse a value for ENTITY declarations
3699 *
3700 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3701 * "'" ([^%&'] | PEReference | Reference)* "'"
3702 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003703 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003704 */
3705
3706xmlChar *
3707xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3708 xmlChar *buf = NULL;
3709 int len = 0;
3710 int size = XML_PARSER_BUFFER_SIZE;
3711 int c, l;
3712 xmlChar stop;
3713 xmlChar *ret = NULL;
3714 const xmlChar *cur = NULL;
3715 xmlParserInputPtr input;
3716
3717 if (RAW == '"') stop = '"';
3718 else if (RAW == '\'') stop = '\'';
3719 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003720 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003721 return(NULL);
3722 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003723 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003724 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003725 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003726 return(NULL);
3727 }
3728
3729 /*
3730 * The content of the entity definition is copied in a buffer.
3731 */
3732
3733 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3734 input = ctxt->input;
3735 GROW;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003736 if (ctxt->instate == XML_PARSER_EOF)
3737 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003738 NEXT;
3739 c = CUR_CHAR(l);
3740 /*
3741 * NOTE: 4.4.5 Included in Literal
3742 * When a parameter entity reference appears in a literal entity
3743 * value, ... a single or double quote character in the replacement
3744 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003745 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003746 * In practice it means we stop the loop only when back at parsing
3747 * the initial entity and the quote is found
3748 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003749 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3750 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003751 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003752 xmlChar *tmp;
3753
Owen Taylor3473f882001-02-23 17:55:21 +00003754 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003755 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3756 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003757 xmlErrMemory(ctxt, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003758 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003759 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003760 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003761 }
3762 COPY_BUF(l,buf,len,c);
3763 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003764
3765 GROW;
3766 c = CUR_CHAR(l);
3767 if (c == 0) {
3768 GROW;
3769 c = CUR_CHAR(l);
3770 }
3771 }
3772 buf[len] = 0;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003773 if (ctxt->instate == XML_PARSER_EOF)
3774 goto error;
3775 if (c != stop) {
3776 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3777 goto error;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003778 }
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003779 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00003780
3781 /*
3782 * Raise problem w.r.t. '&' and '%' being used in non-entities
3783 * reference constructs. Note Charref will be handled in
3784 * xmlStringDecodeEntities()
3785 */
3786 cur = buf;
3787 while (*cur != 0) { /* non input consuming */
3788 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3789 xmlChar *name;
3790 xmlChar tmp = *cur;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003791 int nameOk = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003792
3793 cur++;
3794 name = xmlParseStringName(ctxt, &cur);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003795 if (name != NULL) {
3796 nameOk = 1;
3797 xmlFree(name);
3798 }
3799 if ((nameOk == 0) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003800 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003801 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003802 tmp);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003803 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003804 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003805 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3806 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003807 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003808 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003809 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003810 if (*cur == 0)
3811 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003812 }
3813 cur++;
3814 }
3815
3816 /*
3817 * Then PEReference entities are substituted.
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003818 *
3819 * NOTE: 4.4.7 Bypassed
3820 * When a general entity reference appears in the EntityValue in
3821 * an entity declaration, it is bypassed and left as is.
3822 * so XML_SUBSTITUTE_REF is not set here.
Owen Taylor3473f882001-02-23 17:55:21 +00003823 */
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003824 ++ctxt->depth;
3825 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3826 0, 0, 0);
3827 --ctxt->depth;
3828 if (orig != NULL) {
3829 *orig = buf;
3830 buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003831 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003832
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003833error:
3834 if (buf != NULL)
3835 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003836 return(ret);
3837}
3838
3839/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003840 * xmlParseAttValueComplex:
3841 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003842 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003843 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003844 *
3845 * parse a value for an attribute, this is the fallback function
3846 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003847 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003848 *
3849 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3850 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003851static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003852xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003853 xmlChar limit = 0;
3854 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003855 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003856 size_t len = 0;
3857 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003858 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003859 xmlChar *current = NULL;
3860 xmlEntityPtr ent;
3861
Owen Taylor3473f882001-02-23 17:55:21 +00003862 if (NXT(0) == '"') {
3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864 limit = '"';
3865 NEXT;
3866 } else if (NXT(0) == '\'') {
3867 limit = '\'';
3868 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3869 NEXT;
3870 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003871 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003872 return(NULL);
3873 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003874
Owen Taylor3473f882001-02-23 17:55:21 +00003875 /*
3876 * allocate a translation buffer.
3877 */
3878 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003879 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003880 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003881
3882 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003883 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003884 */
3885 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003886 while (((NXT(0) != limit) && /* checked */
3887 (IS_CHAR(c)) && (c != '<')) &&
3888 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003889 /*
3890 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3891 * special option is given
3892 */
3893 if ((len > XML_MAX_TEXT_LENGTH) &&
3894 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3895 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003896 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003897 goto mem_error;
3898 }
Owen Taylor3473f882001-02-23 17:55:21 +00003899 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003900 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003901 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003902 if (NXT(1) == '#') {
3903 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003904
Owen Taylor3473f882001-02-23 17:55:21 +00003905 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003906 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003907 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003908 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003909 }
3910 buf[len++] = '&';
3911 } else {
3912 /*
3913 * The reparsing will be done in xmlStringGetNodeList()
3914 * called by the attribute() function in SAX.c
3915 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003916 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003917 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003918 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003919 buf[len++] = '&';
3920 buf[len++] = '#';
3921 buf[len++] = '3';
3922 buf[len++] = '8';
3923 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003924 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003925 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003926 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003927 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003928 }
Owen Taylor3473f882001-02-23 17:55:21 +00003929 len += xmlCopyChar(0, &buf[len], val);
3930 }
3931 } else {
3932 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003933 ctxt->nbentities++;
3934 if (ent != NULL)
3935 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003936 if ((ent != NULL) &&
3937 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003938 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003939 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003940 }
3941 if ((ctxt->replaceEntities == 0) &&
3942 (ent->content[0] == '&')) {
3943 buf[len++] = '&';
3944 buf[len++] = '#';
3945 buf[len++] = '3';
3946 buf[len++] = '8';
3947 buf[len++] = ';';
3948 } else {
3949 buf[len++] = ent->content[0];
3950 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003951 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003952 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003953 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02003954 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003955 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003956 XML_SUBSTITUTE_REF,
3957 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003958 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003959 if (rep != NULL) {
3960 current = rep;
3961 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003962 if ((*current == 0xD) || (*current == 0xA) ||
3963 (*current == 0x9)) {
3964 buf[len++] = 0x20;
3965 current++;
3966 } else
3967 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003968 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003969 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 }
3971 }
3972 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003973 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003974 }
3975 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003976 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003977 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003978 }
Owen Taylor3473f882001-02-23 17:55:21 +00003979 if (ent->content != NULL)
3980 buf[len++] = ent->content[0];
3981 }
3982 } else if (ent != NULL) {
3983 int i = xmlStrlen(ent->name);
3984 const xmlChar *cur = ent->name;
3985
3986 /*
3987 * This may look absurd but is needed to detect
3988 * entities problems
3989 */
3990 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003991 (ent->content != NULL) && (ent->checked == 0)) {
3992 unsigned long oldnbent = ctxt->nbentities;
3993
Peter Simons8f30bdf2016-04-15 11:56:55 +02003994 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003995 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003996 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003997 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003998
Daniel Veillardcff25462013-03-11 15:57:55 +08003999 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004000 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004001 if (xmlStrchr(rep, '<'))
4002 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004003 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004004 rep = NULL;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02004005 } else {
4006 ent->content[0] = 0;
4007 }
Owen Taylor3473f882001-02-23 17:55:21 +00004008 }
4009
4010 /*
4011 * Just output the reference
4012 */
4013 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004014 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004015 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004016 }
4017 for (;i > 0;i--)
4018 buf[len++] = *cur++;
4019 buf[len++] = ';';
4020 }
4021 }
4022 } else {
4023 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004024 if ((len != 0) || (!normalize)) {
4025 if ((!normalize) || (!in_space)) {
4026 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004027 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004028 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004029 }
4030 }
4031 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 }
4033 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004034 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004035 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004036 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004037 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004038 }
4039 }
4040 NEXTL(l);
4041 }
4042 GROW;
4043 c = CUR_CHAR(l);
4044 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004045 if (ctxt->instate == XML_PARSER_EOF)
4046 goto error;
4047
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004048 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004049 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004050 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004051 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004052 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004053 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004054 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004055 if ((c != 0) && (!IS_CHAR(c))) {
4056 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4057 "invalid character in attribute value\n");
4058 } else {
4059 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4060 "AttValue: ' expected\n");
4061 }
Owen Taylor3473f882001-02-23 17:55:21 +00004062 } else
4063 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004064
4065 /*
4066 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004067 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004068 */
4069 if (len >= INT_MAX) {
4070 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004071 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004072 goto mem_error;
4073 }
4074
4075 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004076 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004077
4078mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004079 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004080error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004081 if (buf != NULL)
4082 xmlFree(buf);
4083 if (rep != NULL)
4084 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004085 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004086}
4087
4088/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004089 * xmlParseAttValue:
4090 * @ctxt: an XML parser context
4091 *
4092 * parse a value for an attribute
4093 * Note: the parser won't do substitution of entities here, this
4094 * will be handled later in xmlStringGetNodeList
4095 *
4096 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4097 * "'" ([^<&'] | Reference)* "'"
4098 *
4099 * 3.3.3 Attribute-Value Normalization:
4100 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004101 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004102 * - a character reference is processed by appending the referenced
4103 * character to the attribute value
4104 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004105 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004106 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4107 * appending #x20 to the normalized value, except that only a single
4108 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004109 * parsed entity or the literal entity value of an internal parsed entity
4110 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004111 * If the declared value is not CDATA, then the XML processor must further
4112 * process the normalized attribute value by discarding any leading and
4113 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004114 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004115 * All attributes for which no declaration has been read should be treated
4116 * by a non-validating parser as if declared CDATA.
4117 *
4118 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4119 */
4120
4121
4122xmlChar *
4123xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004124 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004125 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004126}
4127
4128/**
Owen Taylor3473f882001-02-23 17:55:21 +00004129 * xmlParseSystemLiteral:
4130 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004131 *
Owen Taylor3473f882001-02-23 17:55:21 +00004132 * parse an XML Literal
4133 *
4134 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4135 *
4136 * Returns the SystemLiteral parsed or NULL
4137 */
4138
4139xmlChar *
4140xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4141 xmlChar *buf = NULL;
4142 int len = 0;
4143 int size = XML_PARSER_BUFFER_SIZE;
4144 int cur, l;
4145 xmlChar stop;
4146 int state = ctxt->instate;
4147 int count = 0;
4148
4149 SHRINK;
4150 if (RAW == '"') {
4151 NEXT;
4152 stop = '"';
4153 } else if (RAW == '\'') {
4154 NEXT;
4155 stop = '\'';
4156 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004157 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004158 return(NULL);
4159 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004160
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004161 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004162 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004163 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004164 return(NULL);
4165 }
4166 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4167 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004168 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004169 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004170 xmlChar *tmp;
4171
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004172 if ((size > XML_MAX_NAME_LENGTH) &&
4173 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4174 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4175 xmlFree(buf);
4176 ctxt->instate = (xmlParserInputState) state;
4177 return(NULL);
4178 }
Owen Taylor3473f882001-02-23 17:55:21 +00004179 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004180 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4181 if (tmp == NULL) {
4182 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004183 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004184 ctxt->instate = (xmlParserInputState) state;
4185 return(NULL);
4186 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004187 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004188 }
4189 count++;
4190 if (count > 50) {
4191 GROW;
4192 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004193 if (ctxt->instate == XML_PARSER_EOF) {
4194 xmlFree(buf);
4195 return(NULL);
4196 }
Owen Taylor3473f882001-02-23 17:55:21 +00004197 }
4198 COPY_BUF(l,buf,len,cur);
4199 NEXTL(l);
4200 cur = CUR_CHAR(l);
4201 if (cur == 0) {
4202 GROW;
4203 SHRINK;
4204 cur = CUR_CHAR(l);
4205 }
4206 }
4207 buf[len] = 0;
4208 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004209 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004210 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004211 } else {
4212 NEXT;
4213 }
4214 return(buf);
4215}
4216
4217/**
4218 * xmlParsePubidLiteral:
4219 * @ctxt: an XML parser context
4220 *
4221 * parse an XML public literal
4222 *
4223 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4224 *
4225 * Returns the PubidLiteral parsed or NULL.
4226 */
4227
4228xmlChar *
4229xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4230 xmlChar *buf = NULL;
4231 int len = 0;
4232 int size = XML_PARSER_BUFFER_SIZE;
4233 xmlChar cur;
4234 xmlChar stop;
4235 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004236 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004237
4238 SHRINK;
4239 if (RAW == '"') {
4240 NEXT;
4241 stop = '"';
4242 } else if (RAW == '\'') {
4243 NEXT;
4244 stop = '\'';
4245 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004246 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 return(NULL);
4248 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004249 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004250 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004251 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004252 return(NULL);
4253 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004254 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004255 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004256 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004257 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004258 xmlChar *tmp;
4259
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004260 if ((size > XML_MAX_NAME_LENGTH) &&
4261 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4262 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4263 xmlFree(buf);
4264 return(NULL);
4265 }
Owen Taylor3473f882001-02-23 17:55:21 +00004266 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004267 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4268 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004269 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004270 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004271 return(NULL);
4272 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004273 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004274 }
4275 buf[len++] = cur;
4276 count++;
4277 if (count > 50) {
4278 GROW;
4279 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004280 if (ctxt->instate == XML_PARSER_EOF) {
4281 xmlFree(buf);
4282 return(NULL);
4283 }
Owen Taylor3473f882001-02-23 17:55:21 +00004284 }
4285 NEXT;
4286 cur = CUR;
4287 if (cur == 0) {
4288 GROW;
4289 SHRINK;
4290 cur = CUR;
4291 }
4292 }
4293 buf[len] = 0;
4294 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004295 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004296 } else {
4297 NEXT;
4298 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004299 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004300 return(buf);
4301}
4302
Daniel Veillard8ed10722009-08-20 19:17:36 +02004303static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004304
4305/*
4306 * used for the test in the inner loop of the char data testing
4307 */
4308static const unsigned char test_char_data[256] = {
4309 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4310 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4311 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4312 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4313 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4314 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4315 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4316 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4317 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4318 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4319 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4320 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4321 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4322 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4323 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4324 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4341};
4342
Owen Taylor3473f882001-02-23 17:55:21 +00004343/**
4344 * xmlParseCharData:
4345 * @ctxt: an XML parser context
4346 * @cdata: int indicating whether we are within a CDATA section
4347 *
4348 * parse a CharData section.
4349 * if we are within a CDATA section ']]>' marks an end of section.
4350 *
4351 * The right angle bracket (>) may be represented using the string "&gt;",
4352 * and must, for compatibility, be escaped using "&gt;" or a character
4353 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004354 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004355 *
4356 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4357 */
4358
4359void
4360xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004361 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004362 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004363 int line = ctxt->input->line;
4364 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004365 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004366
4367 SHRINK;
4368 GROW;
4369 /*
4370 * Accelerated common case where input don't need to be
4371 * modified before passing it to the handler.
4372 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004373 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004374 in = ctxt->input->cur;
4375 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004376get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004377 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004378 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004379 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004380 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004381 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004382 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004383 goto get_more_space;
4384 }
4385 if (*in == '<') {
4386 nbchar = in - ctxt->input->cur;
4387 if (nbchar > 0) {
4388 const xmlChar *tmp = ctxt->input->cur;
4389 ctxt->input->cur = in;
4390
Daniel Veillard34099b42004-11-04 17:34:35 +00004391 if ((ctxt->sax != NULL) &&
4392 (ctxt->sax->ignorableWhitespace !=
4393 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004394 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004395 if (ctxt->sax->ignorableWhitespace != NULL)
4396 ctxt->sax->ignorableWhitespace(ctxt->userData,
4397 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004398 } else {
4399 if (ctxt->sax->characters != NULL)
4400 ctxt->sax->characters(ctxt->userData,
4401 tmp, nbchar);
4402 if (*ctxt->space == -1)
4403 *ctxt->space = -2;
4404 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004405 } else if ((ctxt->sax != NULL) &&
4406 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004407 ctxt->sax->characters(ctxt->userData,
4408 tmp, nbchar);
4409 }
4410 }
4411 return;
4412 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004413
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004414get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004415 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004416 while (test_char_data[*in]) {
4417 in++;
4418 ccol++;
4419 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004420 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004421 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004422 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004423 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004424 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004425 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004426 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004427 }
4428 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004429 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004431 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004432 return;
4433 }
4434 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004435 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004436 goto get_more;
4437 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004438 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004439 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004440 if ((ctxt->sax != NULL) &&
4441 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004442 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004443 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004444 const xmlChar *tmp = ctxt->input->cur;
4445 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004446
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004447 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004448 if (ctxt->sax->ignorableWhitespace != NULL)
4449 ctxt->sax->ignorableWhitespace(ctxt->userData,
4450 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004451 } else {
4452 if (ctxt->sax->characters != NULL)
4453 ctxt->sax->characters(ctxt->userData,
4454 tmp, nbchar);
4455 if (*ctxt->space == -1)
4456 *ctxt->space = -2;
4457 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004458 line = ctxt->input->line;
4459 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004460 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004461 if (ctxt->sax->characters != NULL)
4462 ctxt->sax->characters(ctxt->userData,
4463 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004464 line = ctxt->input->line;
4465 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004466 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004467 /* something really bad happened in the SAX callback */
4468 if (ctxt->instate != XML_PARSER_CONTENT)
4469 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004470 }
4471 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004472 if (*in == 0xD) {
4473 in++;
4474 if (*in == 0xA) {
4475 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004476 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004477 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004478 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004479 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004480 in--;
4481 }
4482 if (*in == '<') {
4483 return;
4484 }
4485 if (*in == '&') {
4486 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004487 }
4488 SHRINK;
4489 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004490 if (ctxt->instate == XML_PARSER_EOF)
4491 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004492 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004493 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004494 nbchar = 0;
4495 }
Daniel Veillard50582112001-03-26 22:52:16 +00004496 ctxt->input->line = line;
4497 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004498 xmlParseCharDataComplex(ctxt, cdata);
4499}
4500
Daniel Veillard01c13b52002-12-10 15:19:08 +00004501/**
4502 * xmlParseCharDataComplex:
4503 * @ctxt: an XML parser context
4504 * @cdata: int indicating whether we are within a CDATA section
4505 *
4506 * parse a CharData section.this is the fallback function
4507 * of xmlParseCharData() when the parsing requires handling
4508 * of non-ASCII characters.
4509 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004510static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004511xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004512 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4513 int nbchar = 0;
4514 int cur, l;
4515 int count = 0;
4516
4517 SHRINK;
4518 GROW;
4519 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004520 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004521 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004522 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004523 if ((cur == ']') && (NXT(1) == ']') &&
4524 (NXT(2) == '>')) {
4525 if (cdata) break;
4526 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004527 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004528 }
4529 }
4530 COPY_BUF(l,buf,nbchar,cur);
4531 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004532 buf[nbchar] = 0;
4533
Owen Taylor3473f882001-02-23 17:55:21 +00004534 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004535 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004536 */
4537 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004538 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004539 if (ctxt->sax->ignorableWhitespace != NULL)
4540 ctxt->sax->ignorableWhitespace(ctxt->userData,
4541 buf, nbchar);
4542 } else {
4543 if (ctxt->sax->characters != NULL)
4544 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004545 if ((ctxt->sax->characters !=
4546 ctxt->sax->ignorableWhitespace) &&
4547 (*ctxt->space == -1))
4548 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004549 }
4550 }
4551 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004552 /* something really bad happened in the SAX callback */
4553 if (ctxt->instate != XML_PARSER_CONTENT)
4554 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004555 }
4556 count++;
4557 if (count > 50) {
4558 GROW;
4559 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004560 if (ctxt->instate == XML_PARSER_EOF)
4561 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004562 }
4563 NEXTL(l);
4564 cur = CUR_CHAR(l);
4565 }
4566 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004567 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004568 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004569 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004570 */
4571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004572 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004573 if (ctxt->sax->ignorableWhitespace != NULL)
4574 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4575 } else {
4576 if (ctxt->sax->characters != NULL)
4577 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004578 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4579 (*ctxt->space == -1))
4580 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004581 }
4582 }
4583 }
Nick Wellnhofer69936b12017-08-30 14:16:01 +02004584 if ((cur != 0) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004585 /* Generate the error and skip the offending character */
4586 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4587 "PCDATA invalid Char value %d\n",
4588 cur);
4589 NEXTL(l);
4590 }
Owen Taylor3473f882001-02-23 17:55:21 +00004591}
4592
4593/**
4594 * xmlParseExternalID:
4595 * @ctxt: an XML parser context
4596 * @publicID: a xmlChar** receiving PubidLiteral
4597 * @strict: indicate whether we should restrict parsing to only
4598 * production [75], see NOTE below
4599 *
4600 * Parse an External ID or a Public ID
4601 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004602 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004603 * 'PUBLIC' S PubidLiteral S SystemLiteral
4604 *
4605 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4606 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4607 *
4608 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4609 *
4610 * Returns the function returns SystemLiteral and in the second
4611 * case publicID receives PubidLiteral, is strict is off
4612 * it is possible to return NULL and have publicID set.
4613 */
4614
4615xmlChar *
4616xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4617 xmlChar *URI = NULL;
4618
4619 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004620
4621 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004622 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004623 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004624 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004625 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4626 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004627 }
Owen Taylor3473f882001-02-23 17:55:21 +00004628 URI = xmlParseSystemLiteral(ctxt);
4629 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004630 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004631 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004632 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004633 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004634 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004635 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004636 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004637 }
Owen Taylor3473f882001-02-23 17:55:21 +00004638 *publicID = xmlParsePubidLiteral(ctxt);
4639 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004640 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004641 }
4642 if (strict) {
4643 /*
4644 * We don't handle [83] so "S SystemLiteral" is required.
4645 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004646 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004648 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004649 }
4650 } else {
4651 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004652 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004653 * "S SystemLiteral" is not detected. We skip blanks if no
4654 * system literal was found, but this is harmless since we must
4655 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004656 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004657 if (SKIP_BLANKS == 0) return(NULL);
4658 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004659 }
Owen Taylor3473f882001-02-23 17:55:21 +00004660 URI = xmlParseSystemLiteral(ctxt);
4661 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004662 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004663 }
4664 }
4665 return(URI);
4666}
4667
4668/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004669 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004670 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004671 * @buf: the already parsed part of the buffer
4672 * @len: number of bytes filles in the buffer
4673 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004674 *
4675 * Skip an XML (SGML) comment <!-- .... -->
4676 * The spec says that "For compatibility, the string "--" (double-hyphen)
4677 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004678 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004679 *
4680 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4681 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004682static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004683xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4684 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004685 int q, ql;
4686 int r, rl;
4687 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004688 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004689 int inputid;
4690
4691 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004692
Owen Taylor3473f882001-02-23 17:55:21 +00004693 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004694 len = 0;
4695 size = XML_PARSER_BUFFER_SIZE;
4696 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4697 if (buf == NULL) {
4698 xmlErrMemory(ctxt, NULL);
4699 return;
4700 }
Owen Taylor3473f882001-02-23 17:55:21 +00004701 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004702 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004703 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004704 if (q == 0)
4705 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004706 if (!IS_CHAR(q)) {
4707 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4708 "xmlParseComment: invalid xmlChar value %d\n",
4709 q);
4710 xmlFree (buf);
4711 return;
4712 }
Owen Taylor3473f882001-02-23 17:55:21 +00004713 NEXTL(ql);
4714 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004715 if (r == 0)
4716 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004717 if (!IS_CHAR(r)) {
4718 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4719 "xmlParseComment: invalid xmlChar value %d\n",
4720 q);
4721 xmlFree (buf);
4722 return;
4723 }
Owen Taylor3473f882001-02-23 17:55:21 +00004724 NEXTL(rl);
4725 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004726 if (cur == 0)
4727 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004728 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004729 ((cur != '>') ||
4730 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004731 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004732 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004733 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004734 if ((len > XML_MAX_TEXT_LENGTH) &&
4735 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4736 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4737 "Comment too big found", NULL);
4738 xmlFree (buf);
4739 return;
4740 }
Owen Taylor3473f882001-02-23 17:55:21 +00004741 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004742 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004743 size_t new_size;
4744
4745 new_size = size * 2;
4746 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004747 if (new_buf == NULL) {
4748 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004749 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004750 return;
4751 }
William M. Bracka3215c72004-07-31 16:24:01 +00004752 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004753 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004754 }
4755 COPY_BUF(ql,buf,len,q);
4756 q = r;
4757 ql = rl;
4758 r = cur;
4759 rl = l;
4760
4761 count++;
4762 if (count > 50) {
4763 GROW;
4764 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004765 if (ctxt->instate == XML_PARSER_EOF) {
4766 xmlFree(buf);
4767 return;
4768 }
Owen Taylor3473f882001-02-23 17:55:21 +00004769 }
4770 NEXTL(l);
4771 cur = CUR_CHAR(l);
4772 if (cur == 0) {
4773 SHRINK;
4774 GROW;
4775 cur = CUR_CHAR(l);
4776 }
4777 }
4778 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004779 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004780 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004781 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004782 } else if (!IS_CHAR(cur)) {
4783 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4784 "xmlParseComment: invalid xmlChar value %d\n",
4785 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004786 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004787 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004788 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004789 "Comment doesn't start and stop in the same"
4790 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004791 }
4792 NEXT;
4793 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4794 (!ctxt->disableSAX))
4795 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004796 }
Daniel Veillardda629342007-08-01 07:49:06 +00004797 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004798 return;
4799not_terminated:
4800 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4801 "Comment not terminated\n", NULL);
4802 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004803 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004804}
Daniel Veillardda629342007-08-01 07:49:06 +00004805
Daniel Veillard4c778d82005-01-23 17:37:44 +00004806/**
4807 * xmlParseComment:
4808 * @ctxt: an XML parser context
4809 *
4810 * Skip an XML (SGML) comment <!-- .... -->
4811 * The spec says that "For compatibility, the string "--" (double-hyphen)
4812 * must not occur within comments. "
4813 *
4814 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4815 */
4816void
4817xmlParseComment(xmlParserCtxtPtr ctxt) {
4818 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004819 size_t size = XML_PARSER_BUFFER_SIZE;
4820 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004821 xmlParserInputState state;
4822 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004823 size_t nbchar = 0;
4824 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004825 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004826
4827 /*
4828 * Check that there is a comment right here.
4829 */
4830 if ((RAW != '<') || (NXT(1) != '!') ||
4831 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004832 state = ctxt->instate;
4833 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004834 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004835 SKIP(4);
4836 SHRINK;
4837 GROW;
4838
4839 /*
4840 * Accelerated common case where input don't need to be
4841 * modified before passing it to the handler.
4842 */
4843 in = ctxt->input->cur;
4844 do {
4845 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004846 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004847 ctxt->input->line++; ctxt->input->col = 1;
4848 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004849 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004850 }
4851get_more:
4852 ccol = ctxt->input->col;
4853 while (((*in > '-') && (*in <= 0x7F)) ||
4854 ((*in >= 0x20) && (*in < '-')) ||
4855 (*in == 0x09)) {
4856 in++;
4857 ccol++;
4858 }
4859 ctxt->input->col = ccol;
4860 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004861 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004862 ctxt->input->line++; ctxt->input->col = 1;
4863 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004864 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004865 goto get_more;
4866 }
4867 nbchar = in - ctxt->input->cur;
4868 /*
4869 * save current set of data
4870 */
4871 if (nbchar > 0) {
4872 if ((ctxt->sax != NULL) &&
4873 (ctxt->sax->comment != NULL)) {
4874 if (buf == NULL) {
4875 if ((*in == '-') && (in[1] == '-'))
4876 size = nbchar + 1;
4877 else
4878 size = XML_PARSER_BUFFER_SIZE + nbchar;
4879 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4880 if (buf == NULL) {
4881 xmlErrMemory(ctxt, NULL);
4882 ctxt->instate = state;
4883 return;
4884 }
4885 len = 0;
4886 } else if (len + nbchar + 1 >= size) {
4887 xmlChar *new_buf;
4888 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4889 new_buf = (xmlChar *) xmlRealloc(buf,
4890 size * sizeof(xmlChar));
4891 if (new_buf == NULL) {
4892 xmlFree (buf);
4893 xmlErrMemory(ctxt, NULL);
4894 ctxt->instate = state;
4895 return;
4896 }
4897 buf = new_buf;
4898 }
4899 memcpy(&buf[len], ctxt->input->cur, nbchar);
4900 len += nbchar;
4901 buf[len] = 0;
4902 }
4903 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004904 if ((len > XML_MAX_TEXT_LENGTH) &&
4905 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4906 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4907 "Comment too big found", NULL);
4908 xmlFree (buf);
4909 return;
4910 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004911 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004912 if (*in == 0xA) {
4913 in++;
4914 ctxt->input->line++; ctxt->input->col = 1;
4915 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004916 if (*in == 0xD) {
4917 in++;
4918 if (*in == 0xA) {
4919 ctxt->input->cur = in;
4920 in++;
4921 ctxt->input->line++; ctxt->input->col = 1;
4922 continue; /* while */
4923 }
4924 in--;
4925 }
4926 SHRINK;
4927 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004928 if (ctxt->instate == XML_PARSER_EOF) {
4929 xmlFree(buf);
4930 return;
4931 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004932 in = ctxt->input->cur;
4933 if (*in == '-') {
4934 if (in[1] == '-') {
4935 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004936 if (ctxt->input->id != inputid) {
4937 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004938 "comment doesn't start and stop in the"
4939 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00004940 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004941 SKIP(3);
4942 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4943 (!ctxt->disableSAX)) {
4944 if (buf != NULL)
4945 ctxt->sax->comment(ctxt->userData, buf);
4946 else
4947 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4948 }
4949 if (buf != NULL)
4950 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08004951 if (ctxt->instate != XML_PARSER_EOF)
4952 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004953 return;
4954 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004955 if (buf != NULL) {
4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 "Double hyphen within comment: "
4958 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004959 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004960 } else
4961 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4962 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004963 in++;
4964 ctxt->input->col++;
4965 }
4966 in++;
4967 ctxt->input->col++;
4968 goto get_more;
4969 }
4970 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4971 xmlParseCommentComplex(ctxt, buf, len, size);
4972 ctxt->instate = state;
4973 return;
4974}
4975
Owen Taylor3473f882001-02-23 17:55:21 +00004976
4977/**
4978 * xmlParsePITarget:
4979 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004980 *
Owen Taylor3473f882001-02-23 17:55:21 +00004981 * parse the name of a PI
4982 *
4983 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4984 *
4985 * Returns the PITarget name or NULL
4986 */
4987
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004988const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004989xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004990 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004991
4992 name = xmlParseName(ctxt);
4993 if ((name != NULL) &&
4994 ((name[0] == 'x') || (name[0] == 'X')) &&
4995 ((name[1] == 'm') || (name[1] == 'M')) &&
4996 ((name[2] == 'l') || (name[2] == 'L'))) {
4997 int i;
4998 if ((name[0] == 'x') && (name[1] == 'm') &&
4999 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005000 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005001 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005002 return(name);
5003 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005004 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005005 return(name);
5006 }
5007 for (i = 0;;i++) {
5008 if (xmlW3CPIs[i] == NULL) break;
5009 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5010 return(name);
5011 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005012 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5013 "xmlParsePITarget: invalid name prefix 'xml'\n",
5014 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005015 }
Daniel Veillard37334572008-07-31 08:20:02 +00005016 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005017 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005018 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005019 }
Owen Taylor3473f882001-02-23 17:55:21 +00005020 return(name);
5021}
5022
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005023#ifdef LIBXML_CATALOG_ENABLED
5024/**
5025 * xmlParseCatalogPI:
5026 * @ctxt: an XML parser context
5027 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005028 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005029 * parse an XML Catalog Processing Instruction.
5030 *
5031 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5032 *
5033 * Occurs only if allowed by the user and if happening in the Misc
5034 * part of the document before any doctype informations
5035 * This will add the given catalog to the parsing context in order
5036 * to be used if there is a resolution need further down in the document
5037 */
5038
5039static void
5040xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5041 xmlChar *URL = NULL;
5042 const xmlChar *tmp, *base;
5043 xmlChar marker;
5044
5045 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005046 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005047 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5048 goto error;
5049 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005050 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005051 if (*tmp != '=') {
5052 return;
5053 }
5054 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005055 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005056 marker = *tmp;
5057 if ((marker != '\'') && (marker != '"'))
5058 goto error;
5059 tmp++;
5060 base = tmp;
5061 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5062 if (*tmp == 0)
5063 goto error;
5064 URL = xmlStrndup(base, tmp - base);
5065 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005066 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005067 if (*tmp != 0)
5068 goto error;
5069
5070 if (URL != NULL) {
5071 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5072 xmlFree(URL);
5073 }
5074 return;
5075
5076error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005077 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5078 "Catalog PI syntax error: %s\n",
5079 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005080 if (URL != NULL)
5081 xmlFree(URL);
5082}
5083#endif
5084
Owen Taylor3473f882001-02-23 17:55:21 +00005085/**
5086 * xmlParsePI:
5087 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005088 *
Owen Taylor3473f882001-02-23 17:55:21 +00005089 * parse an XML Processing Instruction.
5090 *
5091 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5092 *
5093 * The processing is transfered to SAX once parsed.
5094 */
5095
5096void
5097xmlParsePI(xmlParserCtxtPtr ctxt) {
5098 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005099 size_t len = 0;
5100 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005101 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005102 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005103 xmlParserInputState state;
5104 int count = 0;
5105
5106 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005107 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005108 state = ctxt->instate;
5109 ctxt->instate = XML_PARSER_PI;
5110 /*
5111 * this is a Processing Instruction.
5112 */
5113 SKIP(2);
5114 SHRINK;
5115
5116 /*
5117 * Parse the target name and check for special support like
5118 * namespace.
5119 */
5120 target = xmlParsePITarget(ctxt);
5121 if (target != NULL) {
5122 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005123 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005124 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005125 "PI declaration doesn't start and stop in"
5126 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005127 }
5128 SKIP(2);
5129
5130 /*
5131 * SAX: PI detected.
5132 */
5133 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5134 (ctxt->sax->processingInstruction != NULL))
5135 ctxt->sax->processingInstruction(ctxt->userData,
5136 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005137 if (ctxt->instate != XML_PARSER_EOF)
5138 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005139 return;
5140 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005141 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005142 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005143 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005144 ctxt->instate = state;
5145 return;
5146 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005147 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005148 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5149 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005150 }
Owen Taylor3473f882001-02-23 17:55:21 +00005151 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005152 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005153 ((cur != '?') || (NXT(1) != '>'))) {
5154 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005155 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005156 size_t new_size = size * 2;
5157 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005158 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005159 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005160 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005161 ctxt->instate = state;
5162 return;
5163 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005164 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005165 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005166 }
5167 count++;
5168 if (count > 50) {
5169 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005170 if (ctxt->instate == XML_PARSER_EOF) {
5171 xmlFree(buf);
5172 return;
5173 }
Owen Taylor3473f882001-02-23 17:55:21 +00005174 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005175 if ((len > XML_MAX_TEXT_LENGTH) &&
5176 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5177 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5178 "PI %s too big found", target);
5179 xmlFree(buf);
5180 ctxt->instate = state;
5181 return;
5182 }
Owen Taylor3473f882001-02-23 17:55:21 +00005183 }
5184 COPY_BUF(l,buf,len,cur);
5185 NEXTL(l);
5186 cur = CUR_CHAR(l);
5187 if (cur == 0) {
5188 SHRINK;
5189 GROW;
5190 cur = CUR_CHAR(l);
5191 }
5192 }
Daniel Veillard51304812012-07-19 20:34:26 +08005193 if ((len > XML_MAX_TEXT_LENGTH) &&
5194 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5195 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5196 "PI %s too big found", target);
5197 xmlFree(buf);
5198 ctxt->instate = state;
5199 return;
5200 }
Owen Taylor3473f882001-02-23 17:55:21 +00005201 buf[len] = 0;
5202 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005203 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5204 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005205 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005206 if (inputid != ctxt->input->id) {
5207 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5208 "PI declaration doesn't start and stop in"
5209 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005210 }
5211 SKIP(2);
5212
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005213#ifdef LIBXML_CATALOG_ENABLED
5214 if (((state == XML_PARSER_MISC) ||
5215 (state == XML_PARSER_START)) &&
5216 (xmlStrEqual(target, XML_CATALOG_PI))) {
5217 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5218 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5219 (allow == XML_CATA_ALLOW_ALL))
5220 xmlParseCatalogPI(ctxt, buf);
5221 }
5222#endif
5223
5224
Owen Taylor3473f882001-02-23 17:55:21 +00005225 /*
5226 * SAX: PI detected.
5227 */
5228 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5229 (ctxt->sax->processingInstruction != NULL))
5230 ctxt->sax->processingInstruction(ctxt->userData,
5231 target, buf);
5232 }
5233 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005234 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005235 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005236 }
Chris Evans77404b82011-12-14 16:18:25 +08005237 if (ctxt->instate != XML_PARSER_EOF)
5238 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005239 }
5240}
5241
5242/**
5243 * xmlParseNotationDecl:
5244 * @ctxt: an XML parser context
5245 *
5246 * parse a notation declaration
5247 *
5248 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5249 *
5250 * Hence there is actually 3 choices:
5251 * 'PUBLIC' S PubidLiteral
5252 * 'PUBLIC' S PubidLiteral S SystemLiteral
5253 * and 'SYSTEM' S SystemLiteral
5254 *
5255 * See the NOTE on xmlParseExternalID().
5256 */
5257
5258void
5259xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005260 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 xmlChar *Pubid;
5262 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005263
Daniel Veillarda07050d2003-10-19 14:46:32 +00005264 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005265 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005266 SHRINK;
5267 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005268 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005269 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5270 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005271 return;
5272 }
Owen Taylor3473f882001-02-23 17:55:21 +00005273
Daniel Veillard76d66f42001-05-16 21:05:17 +00005274 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005275 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005276 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005277 return;
5278 }
Daniel Veillard37334572008-07-31 08:20:02 +00005279 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005280 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005281 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005282 name, NULL, NULL);
5283 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005284 if (SKIP_BLANKS == 0) {
5285 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5286 "Space required after the NOTATION name'\n");
5287 return;
5288 }
Owen Taylor3473f882001-02-23 17:55:21 +00005289
5290 /*
5291 * Parse the IDs.
5292 */
5293 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5294 SKIP_BLANKS;
5295
5296 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005297 if (inputid != ctxt->input->id) {
5298 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299 "Notation declaration doesn't start and stop"
5300 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005301 }
5302 NEXT;
5303 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5304 (ctxt->sax->notationDecl != NULL))
5305 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5306 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005307 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005308 }
Owen Taylor3473f882001-02-23 17:55:21 +00005309 if (Systemid != NULL) xmlFree(Systemid);
5310 if (Pubid != NULL) xmlFree(Pubid);
5311 }
5312}
5313
5314/**
5315 * xmlParseEntityDecl:
5316 * @ctxt: an XML parser context
5317 *
5318 * parse <!ENTITY declarations
5319 *
5320 * [70] EntityDecl ::= GEDecl | PEDecl
5321 *
5322 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5323 *
5324 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5325 *
5326 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5327 *
5328 * [74] PEDef ::= EntityValue | ExternalID
5329 *
5330 * [76] NDataDecl ::= S 'NDATA' S Name
5331 *
5332 * [ VC: Notation Declared ]
5333 * The Name must match the declared name of a notation.
5334 */
5335
5336void
5337xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005338 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005339 xmlChar *value = NULL;
5340 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005341 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 int isParameter = 0;
5343 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005344
Daniel Veillard4c778d82005-01-23 17:37:44 +00005345 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005346 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005347 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005348 SHRINK;
5349 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005350 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005353 }
Owen Taylor3473f882001-02-23 17:55:21 +00005354
5355 if (RAW == '%') {
5356 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005357 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005358 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005359 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005360 }
Owen Taylor3473f882001-02-23 17:55:21 +00005361 isParameter = 1;
5362 }
5363
Daniel Veillard76d66f42001-05-16 21:05:17 +00005364 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005365 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005366 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5367 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005368 return;
5369 }
Daniel Veillard37334572008-07-31 08:20:02 +00005370 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005371 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005372 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005373 name, NULL, NULL);
5374 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005375 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5377 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005378 }
Owen Taylor3473f882001-02-23 17:55:21 +00005379
Daniel Veillardf5582f12002-06-11 10:08:16 +00005380 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005381 /*
5382 * handle the various case of definitions...
5383 */
5384 if (isParameter) {
5385 if ((RAW == '"') || (RAW == '\'')) {
5386 value = xmlParseEntityValue(ctxt, &orig);
5387 if (value) {
5388 if ((ctxt->sax != NULL) &&
5389 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5390 ctxt->sax->entityDecl(ctxt->userData, name,
5391 XML_INTERNAL_PARAMETER_ENTITY,
5392 NULL, NULL, value);
5393 }
5394 } else {
5395 URI = xmlParseExternalID(ctxt, &literal, 1);
5396 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005397 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005398 }
5399 if (URI) {
5400 xmlURIPtr uri;
5401
5402 uri = xmlParseURI((const char *) URI);
5403 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005404 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5405 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005406 /*
5407 * This really ought to be a well formedness error
5408 * but the XML Core WG decided otherwise c.f. issue
5409 * E26 of the XML erratas.
5410 */
Owen Taylor3473f882001-02-23 17:55:21 +00005411 } else {
5412 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005413 /*
5414 * Okay this is foolish to block those but not
5415 * invalid URIs.
5416 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005417 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 } else {
5419 if ((ctxt->sax != NULL) &&
5420 (!ctxt->disableSAX) &&
5421 (ctxt->sax->entityDecl != NULL))
5422 ctxt->sax->entityDecl(ctxt->userData, name,
5423 XML_EXTERNAL_PARAMETER_ENTITY,
5424 literal, URI, NULL);
5425 }
5426 xmlFreeURI(uri);
5427 }
5428 }
5429 }
5430 } else {
5431 if ((RAW == '"') || (RAW == '\'')) {
5432 value = xmlParseEntityValue(ctxt, &orig);
5433 if ((ctxt->sax != NULL) &&
5434 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5435 ctxt->sax->entityDecl(ctxt->userData, name,
5436 XML_INTERNAL_GENERAL_ENTITY,
5437 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005438 /*
5439 * For expat compatibility in SAX mode.
5440 */
5441 if ((ctxt->myDoc == NULL) ||
5442 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5443 if (ctxt->myDoc == NULL) {
5444 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005445 if (ctxt->myDoc == NULL) {
5446 xmlErrMemory(ctxt, "New Doc failed");
5447 return;
5448 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005449 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005450 }
5451 if (ctxt->myDoc->intSubset == NULL)
5452 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5453 BAD_CAST "fake", NULL, NULL);
5454
Daniel Veillard1af9a412003-08-20 22:54:39 +00005455 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5456 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005457 }
Owen Taylor3473f882001-02-23 17:55:21 +00005458 } else {
5459 URI = xmlParseExternalID(ctxt, &literal, 1);
5460 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005461 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005462 }
5463 if (URI) {
5464 xmlURIPtr uri;
5465
5466 uri = xmlParseURI((const char *)URI);
5467 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005468 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5469 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005470 /*
5471 * This really ought to be a well formedness error
5472 * but the XML Core WG decided otherwise c.f. issue
5473 * E26 of the XML erratas.
5474 */
Owen Taylor3473f882001-02-23 17:55:21 +00005475 } else {
5476 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005477 /*
5478 * Okay this is foolish to block those but not
5479 * invalid URIs.
5480 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005481 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005482 }
5483 xmlFreeURI(uri);
5484 }
5485 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005486 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5488 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005489 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005490 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005491 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005492 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005493 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5494 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005495 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005496 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5498 (ctxt->sax->unparsedEntityDecl != NULL))
5499 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5500 literal, URI, ndata);
5501 } else {
5502 if ((ctxt->sax != NULL) &&
5503 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5504 ctxt->sax->entityDecl(ctxt->userData, name,
5505 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5506 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005507 /*
5508 * For expat compatibility in SAX mode.
5509 * assuming the entity repalcement was asked for
5510 */
5511 if ((ctxt->replaceEntities != 0) &&
5512 ((ctxt->myDoc == NULL) ||
5513 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5514 if (ctxt->myDoc == NULL) {
5515 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005516 if (ctxt->myDoc == NULL) {
5517 xmlErrMemory(ctxt, "New Doc failed");
5518 return;
5519 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005520 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005521 }
5522
5523 if (ctxt->myDoc->intSubset == NULL)
5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005526 xmlSAX2EntityDecl(ctxt, name,
5527 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5528 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005529 }
Owen Taylor3473f882001-02-23 17:55:21 +00005530 }
5531 }
5532 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005533 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005534 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005535 SKIP_BLANKS;
5536 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005537 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005538 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005539 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005540 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005541 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005542 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005543 "Entity declaration doesn't start and stop in"
5544 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005545 }
5546 NEXT;
5547 }
5548 if (orig != NULL) {
5549 /*
5550 * Ugly mechanism to save the raw entity value.
5551 */
5552 xmlEntityPtr cur = NULL;
5553
5554 if (isParameter) {
5555 if ((ctxt->sax != NULL) &&
5556 (ctxt->sax->getParameterEntity != NULL))
5557 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5558 } else {
5559 if ((ctxt->sax != NULL) &&
5560 (ctxt->sax->getEntity != NULL))
5561 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005562 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005563 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005564 }
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005566 if ((cur != NULL) && (cur->orig == NULL)) {
5567 cur->orig = orig;
5568 orig = NULL;
5569 }
Owen Taylor3473f882001-02-23 17:55:21 +00005570 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005571
5572done:
Owen Taylor3473f882001-02-23 17:55:21 +00005573 if (value != NULL) xmlFree(value);
5574 if (URI != NULL) xmlFree(URI);
5575 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005576 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005577 }
5578}
5579
5580/**
5581 * xmlParseDefaultDecl:
5582 * @ctxt: an XML parser context
5583 * @value: Receive a possible fixed default value for the attribute
5584 *
5585 * Parse an attribute default declaration
5586 *
5587 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5588 *
5589 * [ VC: Required Attribute ]
5590 * if the default declaration is the keyword #REQUIRED, then the
5591 * attribute must be specified for all elements of the type in the
5592 * attribute-list declaration.
5593 *
5594 * [ VC: Attribute Default Legal ]
5595 * The declared default value must meet the lexical constraints of
5596 * the declared attribute type c.f. xmlValidateAttributeDecl()
5597 *
5598 * [ VC: Fixed Attribute Default ]
5599 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005600 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005601 *
5602 * [ WFC: No < in Attribute Values ]
5603 * handled in xmlParseAttValue()
5604 *
5605 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005606 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005607 */
5608
5609int
5610xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5611 int val;
5612 xmlChar *ret;
5613
5614 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005615 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005616 SKIP(9);
5617 return(XML_ATTRIBUTE_REQUIRED);
5618 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005619 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005620 SKIP(8);
5621 return(XML_ATTRIBUTE_IMPLIED);
5622 }
5623 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005624 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005625 SKIP(6);
5626 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005627 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5629 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005630 }
Owen Taylor3473f882001-02-23 17:55:21 +00005631 }
5632 ret = xmlParseAttValue(ctxt);
5633 ctxt->instate = XML_PARSER_DTD;
5634 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005635 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005636 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005637 } else
5638 *value = ret;
5639 return(val);
5640}
5641
5642/**
5643 * xmlParseNotationType:
5644 * @ctxt: an XML parser context
5645 *
5646 * parse an Notation attribute type.
5647 *
5648 * Note: the leading 'NOTATION' S part has already being parsed...
5649 *
5650 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5651 *
5652 * [ VC: Notation Attributes ]
5653 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005654 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005655 *
5656 * Returns: the notation attribute tree built while parsing
5657 */
5658
5659xmlEnumerationPtr
5660xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005661 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005662 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005663
5664 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005665 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005666 return(NULL);
5667 }
5668 SHRINK;
5669 do {
5670 NEXT;
5671 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005672 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005673 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005674 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5675 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005676 xmlFreeEnumeration(ret);
5677 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005678 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005679 tmp = ret;
5680 while (tmp != NULL) {
5681 if (xmlStrEqual(name, tmp->name)) {
5682 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5683 "standalone: attribute notation value token %s duplicated\n",
5684 name, NULL);
5685 if (!xmlDictOwns(ctxt->dict, name))
5686 xmlFree((xmlChar *) name);
5687 break;
5688 }
5689 tmp = tmp->next;
5690 }
5691 if (tmp == NULL) {
5692 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005693 if (cur == NULL) {
5694 xmlFreeEnumeration(ret);
5695 return(NULL);
5696 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005697 if (last == NULL) ret = last = cur;
5698 else {
5699 last->next = cur;
5700 last = cur;
5701 }
Owen Taylor3473f882001-02-23 17:55:21 +00005702 }
5703 SKIP_BLANKS;
5704 } while (RAW == '|');
5705 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005706 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005707 xmlFreeEnumeration(ret);
5708 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005709 }
5710 NEXT;
5711 return(ret);
5712}
5713
5714/**
5715 * xmlParseEnumerationType:
5716 * @ctxt: an XML parser context
5717 *
5718 * parse an Enumeration attribute type.
5719 *
5720 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5721 *
5722 * [ VC: Enumeration ]
5723 * Values of this type must match one of the Nmtoken tokens in
5724 * the declaration
5725 *
5726 * Returns: the enumeration attribute tree built while parsing
5727 */
5728
5729xmlEnumerationPtr
5730xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5731 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005732 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005733
5734 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005735 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005736 return(NULL);
5737 }
5738 SHRINK;
5739 do {
5740 NEXT;
5741 SKIP_BLANKS;
5742 name = xmlParseNmtoken(ctxt);
5743 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005744 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 return(ret);
5746 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005747 tmp = ret;
5748 while (tmp != NULL) {
5749 if (xmlStrEqual(name, tmp->name)) {
5750 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5751 "standalone: attribute enumeration value token %s duplicated\n",
5752 name, NULL);
5753 if (!xmlDictOwns(ctxt->dict, name))
5754 xmlFree(name);
5755 break;
5756 }
5757 tmp = tmp->next;
5758 }
5759 if (tmp == NULL) {
5760 cur = xmlCreateEnumeration(name);
5761 if (!xmlDictOwns(ctxt->dict, name))
5762 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005763 if (cur == NULL) {
5764 xmlFreeEnumeration(ret);
5765 return(NULL);
5766 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005767 if (last == NULL) ret = last = cur;
5768 else {
5769 last->next = cur;
5770 last = cur;
5771 }
Owen Taylor3473f882001-02-23 17:55:21 +00005772 }
5773 SKIP_BLANKS;
5774 } while (RAW == '|');
5775 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005776 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005777 return(ret);
5778 }
5779 NEXT;
5780 return(ret);
5781}
5782
5783/**
5784 * xmlParseEnumeratedType:
5785 * @ctxt: an XML parser context
5786 * @tree: the enumeration tree built while parsing
5787 *
5788 * parse an Enumerated attribute type.
5789 *
5790 * [57] EnumeratedType ::= NotationType | Enumeration
5791 *
5792 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5793 *
5794 *
5795 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5796 */
5797
5798int
5799xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005800 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005801 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005802 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005803 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5804 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005805 return(0);
5806 }
Owen Taylor3473f882001-02-23 17:55:21 +00005807 *tree = xmlParseNotationType(ctxt);
5808 if (*tree == NULL) return(0);
5809 return(XML_ATTRIBUTE_NOTATION);
5810 }
5811 *tree = xmlParseEnumerationType(ctxt);
5812 if (*tree == NULL) return(0);
5813 return(XML_ATTRIBUTE_ENUMERATION);
5814}
5815
5816/**
5817 * xmlParseAttributeType:
5818 * @ctxt: an XML parser context
5819 * @tree: the enumeration tree built while parsing
5820 *
5821 * parse the Attribute list def for an element
5822 *
5823 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5824 *
5825 * [55] StringType ::= 'CDATA'
5826 *
5827 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5828 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5829 *
5830 * Validity constraints for attribute values syntax are checked in
5831 * xmlValidateAttributeValue()
5832 *
5833 * [ VC: ID ]
5834 * Values of type ID must match the Name production. A name must not
5835 * appear more than once in an XML document as a value of this type;
5836 * i.e., ID values must uniquely identify the elements which bear them.
5837 *
5838 * [ VC: One ID per Element Type ]
5839 * No element type may have more than one ID attribute specified.
5840 *
5841 * [ VC: ID Attribute Default ]
5842 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5843 *
5844 * [ VC: IDREF ]
5845 * Values of type IDREF must match the Name production, and values
5846 * of type IDREFS must match Names; each IDREF Name must match the value
5847 * of an ID attribute on some element in the XML document; i.e. IDREF
5848 * values must match the value of some ID attribute.
5849 *
5850 * [ VC: Entity Name ]
5851 * Values of type ENTITY must match the Name production, values
5852 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005853 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005854 *
5855 * [ VC: Name Token ]
5856 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005857 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005858 *
5859 * Returns the attribute type
5860 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005861int
Owen Taylor3473f882001-02-23 17:55:21 +00005862xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5863 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005864 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005865 SKIP(5);
5866 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005867 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005868 SKIP(6);
5869 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005870 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005871 SKIP(5);
5872 return(XML_ATTRIBUTE_IDREF);
5873 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5874 SKIP(2);
5875 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005876 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005877 SKIP(6);
5878 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005879 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005880 SKIP(8);
5881 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005882 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005883 SKIP(8);
5884 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005885 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005886 SKIP(7);
5887 return(XML_ATTRIBUTE_NMTOKEN);
5888 }
5889 return(xmlParseEnumeratedType(ctxt, tree));
5890}
5891
5892/**
5893 * xmlParseAttributeListDecl:
5894 * @ctxt: an XML parser context
5895 *
5896 * : parse the Attribute list def for an element
5897 *
5898 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5899 *
5900 * [53] AttDef ::= S Name S AttType S DefaultDecl
5901 *
5902 */
5903void
5904xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005905 const xmlChar *elemName;
5906 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005907 xmlEnumerationPtr tree;
5908
Daniel Veillarda07050d2003-10-19 14:46:32 +00005909 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005910 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005911
5912 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005913 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005915 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005916 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005917 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005918 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005919 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5920 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005921 return;
5922 }
5923 SKIP_BLANKS;
5924 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005925 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005926 int type;
5927 int def;
5928 xmlChar *defaultValue = NULL;
5929
5930 GROW;
5931 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005932 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005933 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005934 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5935 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005936 break;
5937 }
5938 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005939 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005940 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005941 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005942 break;
5943 }
Owen Taylor3473f882001-02-23 17:55:21 +00005944
5945 type = xmlParseAttributeType(ctxt, &tree);
5946 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005947 break;
5948 }
5949
5950 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005951 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005952 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5953 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005954 if (tree != NULL)
5955 xmlFreeEnumeration(tree);
5956 break;
5957 }
Owen Taylor3473f882001-02-23 17:55:21 +00005958
5959 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5960 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005961 if (defaultValue != NULL)
5962 xmlFree(defaultValue);
5963 if (tree != NULL)
5964 xmlFreeEnumeration(tree);
5965 break;
5966 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005967 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5968 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005969
5970 GROW;
5971 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005972 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005973 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005974 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005975 if (defaultValue != NULL)
5976 xmlFree(defaultValue);
5977 if (tree != NULL)
5978 xmlFreeEnumeration(tree);
5979 break;
5980 }
Owen Taylor3473f882001-02-23 17:55:21 +00005981 }
Owen Taylor3473f882001-02-23 17:55:21 +00005982 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5983 (ctxt->sax->attributeDecl != NULL))
5984 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5985 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005986 else if (tree != NULL)
5987 xmlFreeEnumeration(tree);
5988
5989 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005990 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00005991 (def != XML_ATTRIBUTE_REQUIRED)) {
5992 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5993 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005994 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005995 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5996 }
Owen Taylor3473f882001-02-23 17:55:21 +00005997 if (defaultValue != NULL)
5998 xmlFree(defaultValue);
5999 GROW;
6000 }
6001 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006002 if (inputid != ctxt->input->id) {
6003 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6004 "Attribute list declaration doesn't start and"
6005 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006006 }
6007 NEXT;
6008 }
Owen Taylor3473f882001-02-23 17:55:21 +00006009 }
6010}
6011
6012/**
6013 * xmlParseElementMixedContentDecl:
6014 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006015 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006016 *
6017 * parse the declaration for a Mixed Element content
6018 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006019 *
Owen Taylor3473f882001-02-23 17:55:21 +00006020 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6021 * '(' S? '#PCDATA' S? ')'
6022 *
6023 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6024 *
6025 * [ VC: No Duplicate Types ]
6026 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006027 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006028 *
6029 * returns: the list of the xmlElementContentPtr describing the element choices
6030 */
6031xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006032xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006033 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006034 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006035
6036 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006037 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006038 SKIP(7);
6039 SKIP_BLANKS;
6040 SHRINK;
6041 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006042 if (ctxt->input->id != inputchk) {
6043 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6044 "Element content declaration doesn't start and"
6045 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006046 }
Owen Taylor3473f882001-02-23 17:55:21 +00006047 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006048 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006049 if (ret == NULL)
6050 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006051 if (RAW == '*') {
6052 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6053 NEXT;
6054 }
6055 return(ret);
6056 }
6057 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006058 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006059 if (ret == NULL) return(NULL);
6060 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006061 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006062 NEXT;
6063 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006064 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006065 if (ret == NULL) return(NULL);
6066 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006067 if (cur != NULL)
6068 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006069 cur = ret;
6070 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006071 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006072 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006073 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006074 if (n->c1 != NULL)
6075 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006076 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006077 if (n != NULL)
6078 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006079 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006080 }
6081 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006082 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006083 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006084 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006085 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006086 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006087 return(NULL);
6088 }
6089 SKIP_BLANKS;
6090 GROW;
6091 }
6092 if ((RAW == ')') && (NXT(1) == '*')) {
6093 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006094 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006095 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006096 if (cur->c2 != NULL)
6097 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006098 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006099 if (ret != NULL)
6100 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006101 if (ctxt->input->id != inputchk) {
6102 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6103 "Element content declaration doesn't start and"
6104 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006105 }
Owen Taylor3473f882001-02-23 17:55:21 +00006106 SKIP(2);
6107 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006108 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006109 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006110 return(NULL);
6111 }
6112
6113 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006114 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006115 }
6116 return(ret);
6117}
6118
6119/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006120 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006121 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006122 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006123 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006124 *
6125 * parse the declaration for a Mixed Element content
6126 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006127 *
Owen Taylor3473f882001-02-23 17:55:21 +00006128 *
6129 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6130 *
6131 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6132 *
6133 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6134 *
6135 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6136 *
6137 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6138 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006139 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006140 * opening or closing parentheses in a choice, seq, or Mixed
6141 * construct is contained in the replacement text for a parameter
6142 * entity, both must be contained in the same replacement text. For
6143 * interoperability, if a parameter-entity reference appears in a
6144 * choice, seq, or Mixed construct, its replacement text should not
6145 * be empty, and neither the first nor last non-blank character of
6146 * the replacement text should be a connector (| or ,).
6147 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006148 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006149 * hierarchy.
6150 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006151static xmlElementContentPtr
6152xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6153 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006154 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006155 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006156 xmlChar type = 0;
6157
Daniel Veillard489f9672009-08-10 16:49:30 +02006158 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6159 (depth > 2048)) {
6160 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6161"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6162 depth);
6163 return(NULL);
6164 }
Owen Taylor3473f882001-02-23 17:55:21 +00006165 SKIP_BLANKS;
6166 GROW;
6167 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006168 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006169
Owen Taylor3473f882001-02-23 17:55:21 +00006170 /* Recurse on first child */
6171 NEXT;
6172 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006173 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6174 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006175 SKIP_BLANKS;
6176 GROW;
6177 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006178 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006179 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006180 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006181 return(NULL);
6182 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006183 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006184 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006185 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006186 return(NULL);
6187 }
Owen Taylor3473f882001-02-23 17:55:21 +00006188 GROW;
6189 if (RAW == '?') {
6190 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6191 NEXT;
6192 } else if (RAW == '*') {
6193 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6194 NEXT;
6195 } else if (RAW == '+') {
6196 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6197 NEXT;
6198 } else {
6199 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6200 }
Owen Taylor3473f882001-02-23 17:55:21 +00006201 GROW;
6202 }
6203 SKIP_BLANKS;
6204 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006205 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006206 /*
6207 * Each loop we parse one separator and one element.
6208 */
6209 if (RAW == ',') {
6210 if (type == 0) type = CUR;
6211
6212 /*
6213 * Detect "Name | Name , Name" error
6214 */
6215 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006216 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006217 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006218 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006219 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006220 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006221 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006222 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006223 return(NULL);
6224 }
6225 NEXT;
6226
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006227 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006228 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006229 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006230 xmlFreeDocElementContent(ctxt->myDoc, last);
6231 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006232 return(NULL);
6233 }
6234 if (last == NULL) {
6235 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006236 if (ret != NULL)
6237 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006238 ret = cur = op;
6239 } else {
6240 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006241 if (op != NULL)
6242 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006243 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006244 if (last != NULL)
6245 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006246 cur =op;
6247 last = NULL;
6248 }
6249 } else if (RAW == '|') {
6250 if (type == 0) type = CUR;
6251
6252 /*
6253 * Detect "Name , Name | Name" error
6254 */
6255 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006256 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006257 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006258 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006259 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006260 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006261 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006262 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006263 return(NULL);
6264 }
6265 NEXT;
6266
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006267 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006269 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006270 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006271 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006272 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006273 return(NULL);
6274 }
6275 if (last == NULL) {
6276 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006277 if (ret != NULL)
6278 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006279 ret = cur = op;
6280 } else {
6281 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006282 if (op != NULL)
6283 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006284 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006285 if (last != NULL)
6286 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006287 cur =op;
6288 last = NULL;
6289 }
6290 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006291 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006292 if ((last != NULL) && (last != ret))
6293 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006294 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006295 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006296 return(NULL);
6297 }
6298 GROW;
6299 SKIP_BLANKS;
6300 GROW;
6301 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006302 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006303 /* Recurse on second child */
6304 NEXT;
6305 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006306 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6307 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006308 SKIP_BLANKS;
6309 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006310 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006311 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006312 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006313 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006314 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006315 return(NULL);
6316 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006317 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006318 if (last == NULL) {
6319 if (ret != NULL)
6320 xmlFreeDocElementContent(ctxt->myDoc, ret);
6321 return(NULL);
6322 }
Owen Taylor3473f882001-02-23 17:55:21 +00006323 if (RAW == '?') {
6324 last->ocur = XML_ELEMENT_CONTENT_OPT;
6325 NEXT;
6326 } else if (RAW == '*') {
6327 last->ocur = XML_ELEMENT_CONTENT_MULT;
6328 NEXT;
6329 } else if (RAW == '+') {
6330 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6331 NEXT;
6332 } else {
6333 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6334 }
6335 }
6336 SKIP_BLANKS;
6337 GROW;
6338 }
6339 if ((cur != NULL) && (last != NULL)) {
6340 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006341 if (last != NULL)
6342 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006343 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006344 if (ctxt->input->id != inputchk) {
6345 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6346 "Element content declaration doesn't start and stop in"
6347 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006348 }
Owen Taylor3473f882001-02-23 17:55:21 +00006349 NEXT;
6350 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006351 if (ret != NULL) {
6352 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6353 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6355 else
6356 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6357 }
Owen Taylor3473f882001-02-23 17:55:21 +00006358 NEXT;
6359 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006360 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006361 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006362 cur = ret;
6363 /*
6364 * Some normalization:
6365 * (a | b* | c?)* == (a | b | c)*
6366 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006367 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006368 if ((cur->c1 != NULL) &&
6369 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6370 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6371 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6372 if ((cur->c2 != NULL) &&
6373 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6374 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6375 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6376 cur = cur->c2;
6377 }
6378 }
Owen Taylor3473f882001-02-23 17:55:21 +00006379 NEXT;
6380 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006381 if (ret != NULL) {
6382 int found = 0;
6383
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006384 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6385 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6386 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006387 else
6388 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006389 /*
6390 * Some normalization:
6391 * (a | b*)+ == (a | b)*
6392 * (a | b?)+ == (a | b)*
6393 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006394 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006395 if ((cur->c1 != NULL) &&
6396 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6397 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6398 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6399 found = 1;
6400 }
6401 if ((cur->c2 != NULL) &&
6402 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6403 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6404 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6405 found = 1;
6406 }
6407 cur = cur->c2;
6408 }
6409 if (found)
6410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6411 }
Owen Taylor3473f882001-02-23 17:55:21 +00006412 NEXT;
6413 }
6414 return(ret);
6415}
6416
6417/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006418 * xmlParseElementChildrenContentDecl:
6419 * @ctxt: an XML parser context
6420 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006421 *
6422 * parse the declaration for a Mixed Element content
6423 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6424 *
6425 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6426 *
6427 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6428 *
6429 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6430 *
6431 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6432 *
6433 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6434 * TODO Parameter-entity replacement text must be properly nested
6435 * with parenthesized groups. That is to say, if either of the
6436 * opening or closing parentheses in a choice, seq, or Mixed
6437 * construct is contained in the replacement text for a parameter
6438 * entity, both must be contained in the same replacement text. For
6439 * interoperability, if a parameter-entity reference appears in a
6440 * choice, seq, or Mixed construct, its replacement text should not
6441 * be empty, and neither the first nor last non-blank character of
6442 * the replacement text should be a connector (| or ,).
6443 *
6444 * Returns the tree of xmlElementContentPtr describing the element
6445 * hierarchy.
6446 */
6447xmlElementContentPtr
6448xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6449 /* stub left for API/ABI compat */
6450 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6451}
6452
6453/**
Owen Taylor3473f882001-02-23 17:55:21 +00006454 * xmlParseElementContentDecl:
6455 * @ctxt: an XML parser context
6456 * @name: the name of the element being defined.
6457 * @result: the Element Content pointer will be stored here if any
6458 *
6459 * parse the declaration for an Element content either Mixed or Children,
6460 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006461 *
Owen Taylor3473f882001-02-23 17:55:21 +00006462 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6463 *
6464 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6465 */
6466
6467int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006468xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006469 xmlElementContentPtr *result) {
6470
6471 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006472 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006473 int res;
6474
6475 *result = NULL;
6476
6477 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006478 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006479 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006480 return(-1);
6481 }
6482 NEXT;
6483 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006484 if (ctxt->instate == XML_PARSER_EOF)
6485 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006486 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006487 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006488 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006489 res = XML_ELEMENT_TYPE_MIXED;
6490 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006491 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006492 res = XML_ELEMENT_TYPE_ELEMENT;
6493 }
Owen Taylor3473f882001-02-23 17:55:21 +00006494 SKIP_BLANKS;
6495 *result = tree;
6496 return(res);
6497}
6498
6499/**
6500 * xmlParseElementDecl:
6501 * @ctxt: an XML parser context
6502 *
6503 * parse an Element declaration.
6504 *
6505 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6506 *
6507 * [ VC: Unique Element Type Declaration ]
6508 * No element type may be declared more than once
6509 *
6510 * Returns the type of the element, or -1 in case of error
6511 */
6512int
6513xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006514 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006515 int ret = -1;
6516 xmlElementContentPtr content = NULL;
6517
Daniel Veillard4c778d82005-01-23 17:37:44 +00006518 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006519 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006520 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006521
6522 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006523 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006524 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6525 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006526 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006527 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006528 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006529 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006530 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6531 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006532 return(-1);
6533 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006534 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006535 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6536 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006537 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006538 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006539 SKIP(5);
6540 /*
6541 * Element must always be empty.
6542 */
6543 ret = XML_ELEMENT_TYPE_EMPTY;
6544 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6545 (NXT(2) == 'Y')) {
6546 SKIP(3);
6547 /*
6548 * Element is a generic container.
6549 */
6550 ret = XML_ELEMENT_TYPE_ANY;
6551 } else if (RAW == '(') {
6552 ret = xmlParseElementContentDecl(ctxt, name, &content);
6553 } else {
6554 /*
6555 * [ WFC: PEs in Internal Subset ] error handling.
6556 */
6557 if ((RAW == '%') && (ctxt->external == 0) &&
6558 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006559 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006560 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006561 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006562 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006563 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6564 }
Owen Taylor3473f882001-02-23 17:55:21 +00006565 return(-1);
6566 }
6567
6568 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006569
6570 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006571 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006572 if (content != NULL) {
6573 xmlFreeDocElementContent(ctxt->myDoc, content);
6574 }
Owen Taylor3473f882001-02-23 17:55:21 +00006575 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006576 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006577 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006578 "Element declaration doesn't start and stop in"
6579 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006580 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006581
Owen Taylor3473f882001-02-23 17:55:21 +00006582 NEXT;
6583 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006584 (ctxt->sax->elementDecl != NULL)) {
6585 if (content != NULL)
6586 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006587 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6588 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006589 if ((content != NULL) && (content->parent == NULL)) {
6590 /*
6591 * this is a trick: if xmlAddElementDecl is called,
6592 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006593 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006594 * interfaces or change the API/ABI
6595 */
6596 xmlFreeDocElementContent(ctxt->myDoc, content);
6597 }
6598 } else if (content != NULL) {
6599 xmlFreeDocElementContent(ctxt->myDoc, content);
6600 }
Owen Taylor3473f882001-02-23 17:55:21 +00006601 }
Owen Taylor3473f882001-02-23 17:55:21 +00006602 }
6603 return(ret);
6604}
6605
6606/**
Owen Taylor3473f882001-02-23 17:55:21 +00006607 * xmlParseConditionalSections
6608 * @ctxt: an XML parser context
6609 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006610 * [61] conditionalSect ::= includeSect | ignoreSect
6611 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006612 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6613 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6614 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6615 */
6616
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006617static void
Owen Taylor3473f882001-02-23 17:55:21 +00006618xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006619 int id = ctxt->input->id;
6620
Owen Taylor3473f882001-02-23 17:55:21 +00006621 SKIP(3);
6622 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006623 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006624 SKIP(7);
6625 SKIP_BLANKS;
6626 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006627 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006628 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006629 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006630 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006631 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006632 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6633 "All markup of the conditional section is not"
6634 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006635 }
Owen Taylor3473f882001-02-23 17:55:21 +00006636 NEXT;
6637 }
6638 if (xmlParserDebugEntities) {
6639 if ((ctxt->input != NULL) && (ctxt->input->filename))
6640 xmlGenericError(xmlGenericErrorContext,
6641 "%s(%d): ", ctxt->input->filename,
6642 ctxt->input->line);
6643 xmlGenericError(xmlGenericErrorContext,
6644 "Entering INCLUDE Conditional Section\n");
6645 }
6646
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006647 SKIP_BLANKS;
6648 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006649 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6650 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006651 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006652 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006653
6654 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6655 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006656 } else
6657 xmlParseMarkupDecl(ctxt);
6658
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006659 SKIP_BLANKS;
6660 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006661
Daniel Veillardfdc91562002-07-01 21:52:03 +00006662 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006663 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006664 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006665 break;
6666 }
6667 }
6668 if (xmlParserDebugEntities) {
6669 if ((ctxt->input != NULL) && (ctxt->input->filename))
6670 xmlGenericError(xmlGenericErrorContext,
6671 "%s(%d): ", ctxt->input->filename,
6672 ctxt->input->line);
6673 xmlGenericError(xmlGenericErrorContext,
6674 "Leaving INCLUDE Conditional Section\n");
6675 }
6676
Daniel Veillarda07050d2003-10-19 14:46:32 +00006677 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006678 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006679 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006680 int depth = 0;
6681
6682 SKIP(6);
6683 SKIP_BLANKS;
6684 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006685 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006686 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006687 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006688 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006689 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006690 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6691 "All markup of the conditional section is not"
6692 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006693 }
Owen Taylor3473f882001-02-23 17:55:21 +00006694 NEXT;
6695 }
6696 if (xmlParserDebugEntities) {
6697 if ((ctxt->input != NULL) && (ctxt->input->filename))
6698 xmlGenericError(xmlGenericErrorContext,
6699 "%s(%d): ", ctxt->input->filename,
6700 ctxt->input->line);
6701 xmlGenericError(xmlGenericErrorContext,
6702 "Entering IGNORE Conditional Section\n");
6703 }
6704
6705 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006706 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006707 * But disable SAX event generating DTD building in the meantime
6708 */
6709 state = ctxt->disableSAX;
6710 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006712 ctxt->instate = XML_PARSER_IGNORE;
6713
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006714 while (((depth >= 0) && (RAW != 0)) &&
6715 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006716 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6717 depth++;
6718 SKIP(3);
6719 continue;
6720 }
6721 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6722 if (--depth >= 0) SKIP(3);
6723 continue;
6724 }
6725 NEXT;
6726 continue;
6727 }
6728
6729 ctxt->disableSAX = state;
6730 ctxt->instate = instate;
6731
6732 if (xmlParserDebugEntities) {
6733 if ((ctxt->input != NULL) && (ctxt->input->filename))
6734 xmlGenericError(xmlGenericErrorContext,
6735 "%s(%d): ", ctxt->input->filename,
6736 ctxt->input->line);
6737 xmlGenericError(xmlGenericErrorContext,
6738 "Leaving IGNORE Conditional Section\n");
6739 }
6740
6741 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006742 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006743 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006744 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006745 }
6746
6747 if (RAW == 0)
6748 SHRINK;
6749
6750 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006751 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006752 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006753 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006754 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6755 "All markup of the conditional section is not in"
6756 " the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006757 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006758 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006759 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006760 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006761 }
6762}
6763
6764/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006765 * xmlParseMarkupDecl:
6766 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006767 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006768 * parse Markup declarations
6769 *
6770 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6771 * NotationDecl | PI | Comment
6772 *
6773 * [ VC: Proper Declaration/PE Nesting ]
6774 * Parameter-entity replacement text must be properly nested with
6775 * markup declarations. That is to say, if either the first character
6776 * or the last character of a markup declaration (markupdecl above) is
6777 * contained in the replacement text for a parameter-entity reference,
6778 * both must be contained in the same replacement text.
6779 *
6780 * [ WFC: PEs in Internal Subset ]
6781 * In the internal DTD subset, parameter-entity references can occur
6782 * only where markup declarations can occur, not within markup declarations.
6783 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006784 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006785 */
6786void
6787xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6788 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006789 if (CUR == '<') {
6790 if (NXT(1) == '!') {
6791 switch (NXT(2)) {
6792 case 'E':
6793 if (NXT(3) == 'L')
6794 xmlParseElementDecl(ctxt);
6795 else if (NXT(3) == 'N')
6796 xmlParseEntityDecl(ctxt);
6797 break;
6798 case 'A':
6799 xmlParseAttributeListDecl(ctxt);
6800 break;
6801 case 'N':
6802 xmlParseNotationDecl(ctxt);
6803 break;
6804 case '-':
6805 xmlParseComment(ctxt);
6806 break;
6807 default:
6808 /* there is an error but it will be detected later */
6809 break;
6810 }
6811 } else if (NXT(1) == '?') {
6812 xmlParsePI(ctxt);
6813 }
6814 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006815
6816 /*
6817 * detect requirement to exit there and act accordingly
6818 * and avoid having instate overriden later on
6819 */
6820 if (ctxt->instate == XML_PARSER_EOF)
6821 return;
6822
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006823 /*
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006824 * Conditional sections are allowed from entities included
6825 * by PE References in the internal subset.
6826 */
6827 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6828 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6829 xmlParseConditionalSections(ctxt);
6830 }
6831 }
6832
6833 ctxt->instate = XML_PARSER_DTD;
6834}
6835
6836/**
6837 * xmlParseTextDecl:
6838 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006839 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006840 * parse an XML declaration header for external entities
6841 *
6842 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006843 */
6844
6845void
6846xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6847 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006848 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006849
6850 /*
6851 * We know that '<?xml' is here.
6852 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006853 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006854 SKIP(5);
6855 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006856 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006857 return;
6858 }
6859
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006860 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006861 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6862 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006863 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006864
6865 /*
6866 * We may have the VersionInfo here.
6867 */
6868 version = xmlParseVersionInfo(ctxt);
6869 if (version == NULL)
6870 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006871 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006872 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006873 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6874 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006875 }
6876 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006877 ctxt->input->version = version;
6878
6879 /*
6880 * We must have the encoding declaration
6881 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006882 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006883 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6884 /*
6885 * The XML REC instructs us to stop parsing right here
6886 */
6887 return;
6888 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006889 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6890 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6891 "Missing encoding in text declaration\n");
6892 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006893
6894 SKIP_BLANKS;
6895 if ((RAW == '?') && (NXT(1) == '>')) {
6896 SKIP(2);
6897 } else if (RAW == '>') {
6898 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006899 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006900 NEXT;
6901 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006902 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006903 MOVETO_ENDTAG(CUR_PTR);
6904 NEXT;
6905 }
6906}
6907
6908/**
Owen Taylor3473f882001-02-23 17:55:21 +00006909 * xmlParseExternalSubset:
6910 * @ctxt: an XML parser context
6911 * @ExternalID: the external identifier
6912 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006913 *
Owen Taylor3473f882001-02-23 17:55:21 +00006914 * parse Markup declarations from an external subset
6915 *
6916 * [30] extSubset ::= textDecl? extSubsetDecl
6917 *
6918 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6919 */
6920void
6921xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6922 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006923 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006924 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006925
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006926 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006927 (ctxt->input->end - ctxt->input->cur >= 4)) {
6928 xmlChar start[4];
6929 xmlCharEncoding enc;
6930
6931 start[0] = RAW;
6932 start[1] = NXT(1);
6933 start[2] = NXT(2);
6934 start[3] = NXT(3);
6935 enc = xmlDetectCharEncoding(start, 4);
6936 if (enc != XML_CHAR_ENCODING_NONE)
6937 xmlSwitchEncoding(ctxt, enc);
6938 }
6939
Daniel Veillarda07050d2003-10-19 14:46:32 +00006940 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006941 xmlParseTextDecl(ctxt);
6942 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6943 /*
6944 * The XML REC instructs us to stop parsing right here
6945 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08006946 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006947 return;
6948 }
6949 }
6950 if (ctxt->myDoc == NULL) {
6951 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006952 if (ctxt->myDoc == NULL) {
6953 xmlErrMemory(ctxt, "New Doc failed");
6954 return;
6955 }
6956 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006957 }
6958 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6959 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6960
6961 ctxt->instate = XML_PARSER_DTD;
6962 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006963 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006964 while (((RAW == '<') && (NXT(1) == '?')) ||
6965 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006966 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006967 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006968 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006969
6970 GROW;
6971 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6972 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006973 } else
6974 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006975 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006976
Daniel Veillardfdc91562002-07-01 21:52:03 +00006977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006978 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006979 break;
6980 }
6981 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006982
Owen Taylor3473f882001-02-23 17:55:21 +00006983 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006984 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006985 }
6986
6987}
6988
6989/**
6990 * xmlParseReference:
6991 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006992 *
Owen Taylor3473f882001-02-23 17:55:21 +00006993 * parse and handle entity references in content, depending on the SAX
6994 * interface, this may end-up in a call to character() if this is a
6995 * CharRef, a predefined entity, if there is no reference() callback.
6996 * or if the parser was asked to switch to that mode.
6997 *
6998 * [67] Reference ::= EntityRef | CharRef
6999 */
7000void
7001xmlParseReference(xmlParserCtxtPtr ctxt) {
7002 xmlEntityPtr ent;
7003 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007004 int was_checked;
7005 xmlNodePtr list = NULL;
7006 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007007
Daniel Veillard0161e632008-08-28 15:36:32 +00007008
7009 if (RAW != '&')
7010 return;
7011
7012 /*
7013 * Simple case of a CharRef
7014 */
Owen Taylor3473f882001-02-23 17:55:21 +00007015 if (NXT(1) == '#') {
7016 int i = 0;
7017 xmlChar out[10];
7018 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007019 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007020
Daniel Veillarddc171602008-03-26 17:41:38 +00007021 if (value == 0)
7022 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007023 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7024 /*
7025 * So we are using non-UTF-8 buffers
7026 * Check that the char fit on 8bits, if not
7027 * generate a CharRef.
7028 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007029 if (value <= 0xFF) {
7030 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007031 out[1] = 0;
7032 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7033 (!ctxt->disableSAX))
7034 ctxt->sax->characters(ctxt->userData, out, 1);
7035 } else {
7036 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007037 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007038 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007039 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007040 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7041 (!ctxt->disableSAX))
7042 ctxt->sax->reference(ctxt->userData, out);
7043 }
7044 } else {
7045 /*
7046 * Just encode the value in UTF-8
7047 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007048 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007049 out[i] = 0;
7050 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7051 (!ctxt->disableSAX))
7052 ctxt->sax->characters(ctxt->userData, out, i);
7053 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007054 return;
7055 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007056
Daniel Veillard0161e632008-08-28 15:36:32 +00007057 /*
7058 * We are seeing an entity reference
7059 */
7060 ent = xmlParseEntityRef(ctxt);
7061 if (ent == NULL) return;
7062 if (!ctxt->wellFormed)
7063 return;
7064 was_checked = ent->checked;
7065
7066 /* special case of predefined entities */
7067 if ((ent->name == NULL) ||
7068 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7069 val = ent->content;
7070 if (val == NULL) return;
7071 /*
7072 * inline the entity.
7073 */
7074 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7075 (!ctxt->disableSAX))
7076 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7077 return;
7078 }
7079
7080 /*
7081 * The first reference to the entity trigger a parsing phase
7082 * where the ent->children is filled with the result from
7083 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007084 * Note: external parsed entities will not be loaded, it is not
7085 * required for a non-validating parser, unless the parsing option
7086 * of validating, or substituting entities were given. Doing so is
7087 * far more secure as the parser will only process data coming from
7088 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007089 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007090 if (((ent->checked == 0) ||
7091 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007092 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7093 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007094 unsigned long oldnbent = ctxt->nbentities;
7095
7096 /*
7097 * This is a bit hackish but this seems the best
7098 * way to make sure both SAX and DOM entity support
7099 * behaves okay.
7100 */
7101 void *user_data;
7102 if (ctxt->userData == ctxt)
7103 user_data = NULL;
7104 else
7105 user_data = ctxt->userData;
7106
7107 /*
7108 * Check that this entity is well formed
7109 * 4.3.2: An internal general parsed entity is well-formed
7110 * if its replacement text matches the production labeled
7111 * content.
7112 */
7113 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7114 ctxt->depth++;
7115 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7116 user_data, &list);
7117 ctxt->depth--;
7118
7119 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7120 ctxt->depth++;
7121 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7122 user_data, ctxt->depth, ent->URI,
7123 ent->ExternalID, &list);
7124 ctxt->depth--;
7125 } else {
7126 ret = XML_ERR_ENTITY_PE_INTERNAL;
7127 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7128 "invalid entity type found\n", NULL);
7129 }
7130
7131 /*
7132 * Store the number of entities needing parsing for this entity
7133 * content and do checkings
7134 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007135 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7136 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7137 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007138 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007139 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007140 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007141 return;
7142 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007143 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007144 xmlFreeNodeList(list);
7145 return;
7146 }
Owen Taylor3473f882001-02-23 17:55:21 +00007147
Daniel Veillard0161e632008-08-28 15:36:32 +00007148 if ((ret == XML_ERR_OK) && (list != NULL)) {
7149 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7150 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7151 (ent->children == NULL)) {
7152 ent->children = list;
7153 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007154 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007155 * Prune it directly in the generated document
7156 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007157 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007158 if (((list->type == XML_TEXT_NODE) &&
7159 (list->next == NULL)) ||
7160 (ctxt->parseMode == XML_PARSE_READER)) {
7161 list->parent = (xmlNodePtr) ent;
7162 list = NULL;
7163 ent->owner = 1;
7164 } else {
7165 ent->owner = 0;
7166 while (list != NULL) {
7167 list->parent = (xmlNodePtr) ctxt->node;
7168 list->doc = ctxt->myDoc;
7169 if (list->next == NULL)
7170 ent->last = list;
7171 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007172 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007173 list = ent->children;
7174#ifdef LIBXML_LEGACY_ENABLED
7175 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7176 xmlAddEntityReference(ent, list, NULL);
7177#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007178 }
7179 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007180 ent->owner = 1;
7181 while (list != NULL) {
7182 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007183 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007184 if (list->next == NULL)
7185 ent->last = list;
7186 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007187 }
7188 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007189 } else {
7190 xmlFreeNodeList(list);
7191 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007192 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007193 } else if ((ret != XML_ERR_OK) &&
7194 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7195 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7196 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007197 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007198 } else if (list != NULL) {
7199 xmlFreeNodeList(list);
7200 list = NULL;
7201 }
7202 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007203 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007204
7205 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7206 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007207 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007208 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007209 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007210
Daniel Veillard0161e632008-08-28 15:36:32 +00007211 /*
7212 * Now that the entity content has been gathered
7213 * provide it to the application, this can take different forms based
7214 * on the parsing modes.
7215 */
7216 if (ent->children == NULL) {
7217 /*
7218 * Probably running in SAX mode and the callbacks don't
7219 * build the entity content. So unless we already went
7220 * though parsing for first checking go though the entity
7221 * content to generate callbacks associated to the entity
7222 */
7223 if (was_checked != 0) {
7224 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007225 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007226 * This is a bit hackish but this seems the best
7227 * way to make sure both SAX and DOM entity support
7228 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007229 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007230 if (ctxt->userData == ctxt)
7231 user_data = NULL;
7232 else
7233 user_data = ctxt->userData;
7234
7235 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7236 ctxt->depth++;
7237 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7238 ent->content, user_data, NULL);
7239 ctxt->depth--;
7240 } else if (ent->etype ==
7241 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7242 ctxt->depth++;
7243 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7244 ctxt->sax, user_data, ctxt->depth,
7245 ent->URI, ent->ExternalID, NULL);
7246 ctxt->depth--;
7247 } else {
7248 ret = XML_ERR_ENTITY_PE_INTERNAL;
7249 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7250 "invalid entity type found\n", NULL);
7251 }
7252 if (ret == XML_ERR_ENTITY_LOOP) {
7253 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7254 return;
7255 }
7256 }
7257 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7258 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7259 /*
7260 * Entity reference callback comes second, it's somewhat
7261 * superfluous but a compatibility to historical behaviour
7262 */
7263 ctxt->sax->reference(ctxt->userData, ent->name);
7264 }
7265 return;
7266 }
7267
7268 /*
7269 * If we didn't get any children for the entity being built
7270 */
7271 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7272 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7273 /*
7274 * Create a node.
7275 */
7276 ctxt->sax->reference(ctxt->userData, ent->name);
7277 return;
7278 }
7279
7280 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7281 /*
7282 * There is a problem on the handling of _private for entities
7283 * (bug 155816): Should we copy the content of the field from
7284 * the entity (possibly overwriting some value set by the user
7285 * when a copy is created), should we leave it alone, or should
7286 * we try to take care of different situations? The problem
7287 * is exacerbated by the usage of this field by the xmlReader.
7288 * To fix this bug, we look at _private on the created node
7289 * and, if it's NULL, we copy in whatever was in the entity.
7290 * If it's not NULL we leave it alone. This is somewhat of a
7291 * hack - maybe we should have further tests to determine
7292 * what to do.
7293 */
7294 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7295 /*
7296 * Seems we are generating the DOM content, do
7297 * a simple tree copy for all references except the first
7298 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007299 */
7300 if (((list == NULL) && (ent->owner == 0)) ||
7301 (ctxt->parseMode == XML_PARSE_READER)) {
7302 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7303
7304 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007305 * We are copying here, make sure there is no abuse
7306 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007307 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007308 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7309 return;
7310
7311 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007312 * when operating on a reader, the entities definitions
7313 * are always owning the entities subtree.
7314 if (ctxt->parseMode == XML_PARSE_READER)
7315 ent->owner = 1;
7316 */
7317
7318 cur = ent->children;
7319 while (cur != NULL) {
7320 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7321 if (nw != NULL) {
7322 if (nw->_private == NULL)
7323 nw->_private = cur->_private;
7324 if (firstChild == NULL){
7325 firstChild = nw;
7326 }
7327 nw = xmlAddChild(ctxt->node, nw);
7328 }
7329 if (cur == ent->last) {
7330 /*
7331 * needed to detect some strange empty
7332 * node cases in the reader tests
7333 */
7334 if ((ctxt->parseMode == XML_PARSE_READER) &&
7335 (nw != NULL) &&
7336 (nw->type == XML_ELEMENT_NODE) &&
7337 (nw->children == NULL))
7338 nw->extra = 1;
7339
7340 break;
7341 }
7342 cur = cur->next;
7343 }
7344#ifdef LIBXML_LEGACY_ENABLED
7345 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7346 xmlAddEntityReference(ent, firstChild, nw);
7347#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007348 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007349 xmlNodePtr nw = NULL, cur, next, last,
7350 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007351
7352 /*
7353 * We are copying here, make sure there is no abuse
7354 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007355 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007356 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7357 return;
7358
Daniel Veillard0161e632008-08-28 15:36:32 +00007359 /*
7360 * Copy the entity child list and make it the new
7361 * entity child list. The goal is to make sure any
7362 * ID or REF referenced will be the one from the
7363 * document content and not the entity copy.
7364 */
7365 cur = ent->children;
7366 ent->children = NULL;
7367 last = ent->last;
7368 ent->last = NULL;
7369 while (cur != NULL) {
7370 next = cur->next;
7371 cur->next = NULL;
7372 cur->parent = NULL;
7373 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7374 if (nw != NULL) {
7375 if (nw->_private == NULL)
7376 nw->_private = cur->_private;
7377 if (firstChild == NULL){
7378 firstChild = cur;
7379 }
7380 xmlAddChild((xmlNodePtr) ent, nw);
7381 xmlAddChild(ctxt->node, cur);
7382 }
7383 if (cur == last)
7384 break;
7385 cur = next;
7386 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007387 if (ent->owner == 0)
7388 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007389#ifdef LIBXML_LEGACY_ENABLED
7390 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7391 xmlAddEntityReference(ent, firstChild, nw);
7392#endif /* LIBXML_LEGACY_ENABLED */
7393 } else {
7394 const xmlChar *nbktext;
7395
7396 /*
7397 * the name change is to avoid coalescing of the
7398 * node with a possible previous text one which
7399 * would make ent->children a dangling pointer
7400 */
7401 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7402 -1);
7403 if (ent->children->type == XML_TEXT_NODE)
7404 ent->children->name = nbktext;
7405 if ((ent->last != ent->children) &&
7406 (ent->last->type == XML_TEXT_NODE))
7407 ent->last->name = nbktext;
7408 xmlAddChildList(ctxt->node, ent->children);
7409 }
7410
7411 /*
7412 * This is to avoid a nasty side effect, see
7413 * characters() in SAX.c
7414 */
7415 ctxt->nodemem = 0;
7416 ctxt->nodelen = 0;
7417 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007418 }
7419 }
7420}
7421
7422/**
7423 * xmlParseEntityRef:
7424 * @ctxt: an XML parser context
7425 *
7426 * parse ENTITY references declarations
7427 *
7428 * [68] EntityRef ::= '&' Name ';'
7429 *
7430 * [ WFC: Entity Declared ]
7431 * In a document without any DTD, a document with only an internal DTD
7432 * subset which contains no parameter entity references, or a document
7433 * with "standalone='yes'", the Name given in the entity reference
7434 * must match that in an entity declaration, except that well-formed
7435 * documents need not declare any of the following entities: amp, lt,
7436 * gt, apos, quot. The declaration of a parameter entity must precede
7437 * any reference to it. Similarly, the declaration of a general entity
7438 * must precede any reference to it which appears in a default value in an
7439 * attribute-list declaration. Note that if entities are declared in the
7440 * external subset or in external parameter entities, a non-validating
7441 * processor is not obligated to read and process their declarations;
7442 * for such documents, the rule that an entity must be declared is a
7443 * well-formedness constraint only if standalone='yes'.
7444 *
7445 * [ WFC: Parsed Entity ]
7446 * An entity reference must not contain the name of an unparsed entity
7447 *
7448 * Returns the xmlEntityPtr if found, or NULL otherwise.
7449 */
7450xmlEntityPtr
7451xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007452 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007453 xmlEntityPtr ent = NULL;
7454
7455 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007456 if (ctxt->instate == XML_PARSER_EOF)
7457 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007458
Daniel Veillard0161e632008-08-28 15:36:32 +00007459 if (RAW != '&')
7460 return(NULL);
7461 NEXT;
7462 name = xmlParseName(ctxt);
7463 if (name == NULL) {
7464 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7465 "xmlParseEntityRef: no name\n");
7466 return(NULL);
7467 }
7468 if (RAW != ';') {
7469 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7470 return(NULL);
7471 }
7472 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007473
Daniel Veillard0161e632008-08-28 15:36:32 +00007474 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007475 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007476 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007477 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7478 ent = xmlGetPredefinedEntity(name);
7479 if (ent != NULL)
7480 return(ent);
7481 }
Owen Taylor3473f882001-02-23 17:55:21 +00007482
Daniel Veillard0161e632008-08-28 15:36:32 +00007483 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007484 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007485 */
7486 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007487
Daniel Veillard0161e632008-08-28 15:36:32 +00007488 /*
7489 * Ask first SAX for entity resolution, otherwise try the
7490 * entities which may have stored in the parser context.
7491 */
7492 if (ctxt->sax != NULL) {
7493 if (ctxt->sax->getEntity != NULL)
7494 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007496 (ctxt->options & XML_PARSE_OLDSAX))
7497 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007498 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7499 (ctxt->userData==ctxt)) {
7500 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007501 }
7502 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007503 if (ctxt->instate == XML_PARSER_EOF)
7504 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007505 /*
7506 * [ WFC: Entity Declared ]
7507 * In a document without any DTD, a document with only an
7508 * internal DTD subset which contains no parameter entity
7509 * references, or a document with "standalone='yes'", the
7510 * Name given in the entity reference must match that in an
7511 * entity declaration, except that well-formed documents
7512 * need not declare any of the following entities: amp, lt,
7513 * gt, apos, quot.
7514 * The declaration of a parameter entity must precede any
7515 * reference to it.
7516 * Similarly, the declaration of a general entity must
7517 * precede any reference to it which appears in a default
7518 * value in an attribute-list declaration. Note that if
7519 * entities are declared in the external subset or in
7520 * external parameter entities, a non-validating processor
7521 * is not obligated to read and process their declarations;
7522 * for such documents, the rule that an entity must be
7523 * declared is a well-formedness constraint only if
7524 * standalone='yes'.
7525 */
7526 if (ent == NULL) {
7527 if ((ctxt->standalone == 1) ||
7528 ((ctxt->hasExternalSubset == 0) &&
7529 (ctxt->hasPErefs == 0))) {
7530 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7531 "Entity '%s' not defined\n", name);
7532 } else {
7533 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7534 "Entity '%s' not defined\n", name);
7535 if ((ctxt->inSubset == 0) &&
7536 (ctxt->sax != NULL) &&
7537 (ctxt->sax->reference != NULL)) {
7538 ctxt->sax->reference(ctxt->userData, name);
7539 }
7540 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007541 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007542 ctxt->valid = 0;
7543 }
7544
7545 /*
7546 * [ WFC: Parsed Entity ]
7547 * An entity reference must not contain the name of an
7548 * unparsed entity
7549 */
7550 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7551 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7552 "Entity reference to unparsed entity %s\n", name);
7553 }
7554
7555 /*
7556 * [ WFC: No External Entity References ]
7557 * Attribute values cannot contain direct or indirect
7558 * entity references to external entities.
7559 */
7560 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7561 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7562 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7563 "Attribute references external entity '%s'\n", name);
7564 }
7565 /*
7566 * [ WFC: No < in Attribute Values ]
7567 * The replacement text of any entity referred to directly or
7568 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007569 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007570 */
7571 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007572 (ent != NULL) &&
7573 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007574 if (((ent->checked & 1) || (ent->checked == 0)) &&
7575 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007576 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7577 "'<' in entity '%s' is not allowed in attributes values\n", name);
7578 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007579 }
7580
7581 /*
7582 * Internal check, no parameter entities here ...
7583 */
7584 else {
7585 switch (ent->etype) {
7586 case XML_INTERNAL_PARAMETER_ENTITY:
7587 case XML_EXTERNAL_PARAMETER_ENTITY:
7588 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7589 "Attempt to reference the parameter entity '%s'\n",
7590 name);
7591 break;
7592 default:
7593 break;
7594 }
7595 }
7596
7597 /*
7598 * [ WFC: No Recursion ]
7599 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007600 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007601 * Done somewhere else
7602 */
Owen Taylor3473f882001-02-23 17:55:21 +00007603 return(ent);
7604}
7605
7606/**
7607 * xmlParseStringEntityRef:
7608 * @ctxt: an XML parser context
7609 * @str: a pointer to an index in the string
7610 *
7611 * parse ENTITY references declarations, but this version parses it from
7612 * a string value.
7613 *
7614 * [68] EntityRef ::= '&' Name ';'
7615 *
7616 * [ WFC: Entity Declared ]
7617 * In a document without any DTD, a document with only an internal DTD
7618 * subset which contains no parameter entity references, or a document
7619 * with "standalone='yes'", the Name given in the entity reference
7620 * must match that in an entity declaration, except that well-formed
7621 * documents need not declare any of the following entities: amp, lt,
7622 * gt, apos, quot. The declaration of a parameter entity must precede
7623 * any reference to it. Similarly, the declaration of a general entity
7624 * must precede any reference to it which appears in a default value in an
7625 * attribute-list declaration. Note that if entities are declared in the
7626 * external subset or in external parameter entities, a non-validating
7627 * processor is not obligated to read and process their declarations;
7628 * for such documents, the rule that an entity must be declared is a
7629 * well-formedness constraint only if standalone='yes'.
7630 *
7631 * [ WFC: Parsed Entity ]
7632 * An entity reference must not contain the name of an unparsed entity
7633 *
7634 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7635 * is updated to the current location in the string.
7636 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007637static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007638xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7639 xmlChar *name;
7640 const xmlChar *ptr;
7641 xmlChar cur;
7642 xmlEntityPtr ent = NULL;
7643
7644 if ((str == NULL) || (*str == NULL))
7645 return(NULL);
7646 ptr = *str;
7647 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007648 if (cur != '&')
7649 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007650
Daniel Veillard0161e632008-08-28 15:36:32 +00007651 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007652 name = xmlParseStringName(ctxt, &ptr);
7653 if (name == NULL) {
7654 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7655 "xmlParseStringEntityRef: no name\n");
7656 *str = ptr;
7657 return(NULL);
7658 }
7659 if (*ptr != ';') {
7660 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007661 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007662 *str = ptr;
7663 return(NULL);
7664 }
7665 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007666
Owen Taylor3473f882001-02-23 17:55:21 +00007667
Daniel Veillard0161e632008-08-28 15:36:32 +00007668 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007669 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007670 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007671 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7672 ent = xmlGetPredefinedEntity(name);
7673 if (ent != NULL) {
7674 xmlFree(name);
7675 *str = ptr;
7676 return(ent);
7677 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007678 }
Owen Taylor3473f882001-02-23 17:55:21 +00007679
Daniel Veillard0161e632008-08-28 15:36:32 +00007680 /*
7681 * Increate the number of entity references parsed
7682 */
7683 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007684
Daniel Veillard0161e632008-08-28 15:36:32 +00007685 /*
7686 * Ask first SAX for entity resolution, otherwise try the
7687 * entities which may have stored in the parser context.
7688 */
7689 if (ctxt->sax != NULL) {
7690 if (ctxt->sax->getEntity != NULL)
7691 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007692 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7693 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007694 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7695 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007696 }
7697 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007698 if (ctxt->instate == XML_PARSER_EOF) {
7699 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007700 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007701 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007702
7703 /*
7704 * [ WFC: Entity Declared ]
7705 * In a document without any DTD, a document with only an
7706 * internal DTD subset which contains no parameter entity
7707 * references, or a document with "standalone='yes'", the
7708 * Name given in the entity reference must match that in an
7709 * entity declaration, except that well-formed documents
7710 * need not declare any of the following entities: amp, lt,
7711 * gt, apos, quot.
7712 * The declaration of a parameter entity must precede any
7713 * reference to it.
7714 * Similarly, the declaration of a general entity must
7715 * precede any reference to it which appears in a default
7716 * value in an attribute-list declaration. Note that if
7717 * entities are declared in the external subset or in
7718 * external parameter entities, a non-validating processor
7719 * is not obligated to read and process their declarations;
7720 * for such documents, the rule that an entity must be
7721 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007722 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007723 */
7724 if (ent == NULL) {
7725 if ((ctxt->standalone == 1) ||
7726 ((ctxt->hasExternalSubset == 0) &&
7727 (ctxt->hasPErefs == 0))) {
7728 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7729 "Entity '%s' not defined\n", name);
7730 } else {
7731 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7732 "Entity '%s' not defined\n",
7733 name);
7734 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007735 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007736 /* TODO ? check regressions ctxt->valid = 0; */
7737 }
7738
7739 /*
7740 * [ WFC: Parsed Entity ]
7741 * An entity reference must not contain the name of an
7742 * unparsed entity
7743 */
7744 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7745 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7746 "Entity reference to unparsed entity %s\n", name);
7747 }
7748
7749 /*
7750 * [ WFC: No External Entity References ]
7751 * Attribute values cannot contain direct or indirect
7752 * entity references to external entities.
7753 */
7754 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7755 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7756 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7757 "Attribute references external entity '%s'\n", name);
7758 }
7759 /*
7760 * [ WFC: No < in Attribute Values ]
7761 * The replacement text of any entity referred to directly or
7762 * indirectly in an attribute value (other than "&lt;") must
7763 * not contain a <.
7764 */
7765 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7766 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007767 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007768 (xmlStrchr(ent->content, '<'))) {
7769 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7770 "'<' in entity '%s' is not allowed in attributes values\n",
7771 name);
7772 }
7773
7774 /*
7775 * Internal check, no parameter entities here ...
7776 */
7777 else {
7778 switch (ent->etype) {
7779 case XML_INTERNAL_PARAMETER_ENTITY:
7780 case XML_EXTERNAL_PARAMETER_ENTITY:
7781 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7782 "Attempt to reference the parameter entity '%s'\n",
7783 name);
7784 break;
7785 default:
7786 break;
7787 }
7788 }
7789
7790 /*
7791 * [ WFC: No Recursion ]
7792 * A parsed entity must not contain a recursive reference
7793 * to itself, either directly or indirectly.
7794 * Done somewhere else
7795 */
7796
7797 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007798 *str = ptr;
7799 return(ent);
7800}
7801
7802/**
7803 * xmlParsePEReference:
7804 * @ctxt: an XML parser context
7805 *
7806 * parse PEReference declarations
7807 * The entity content is handled directly by pushing it's content as
7808 * a new input stream.
7809 *
7810 * [69] PEReference ::= '%' Name ';'
7811 *
7812 * [ WFC: No Recursion ]
7813 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007814 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007815 *
7816 * [ WFC: Entity Declared ]
7817 * In a document without any DTD, a document with only an internal DTD
7818 * subset which contains no parameter entity references, or a document
7819 * with "standalone='yes'", ... ... The declaration of a parameter
7820 * entity must precede any reference to it...
7821 *
7822 * [ VC: Entity Declared ]
7823 * In a document with an external subset or external parameter entities
7824 * with "standalone='no'", ... ... The declaration of a parameter entity
7825 * must precede any reference to it...
7826 *
7827 * [ WFC: In DTD ]
7828 * Parameter-entity references may only appear in the DTD.
7829 * NOTE: misleading but this is handled.
7830 */
7831void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007832xmlParsePEReference(xmlParserCtxtPtr ctxt)
7833{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007834 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007835 xmlEntityPtr entity = NULL;
7836 xmlParserInputPtr input;
7837
Daniel Veillard0161e632008-08-28 15:36:32 +00007838 if (RAW != '%')
7839 return;
7840 NEXT;
7841 name = xmlParseName(ctxt);
7842 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007843 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007844 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007845 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007846 if (xmlParserDebugEntities)
7847 xmlGenericError(xmlGenericErrorContext,
7848 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007849 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007850 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007851 return;
7852 }
7853
7854 NEXT;
7855
7856 /*
7857 * Increate the number of entity references parsed
7858 */
7859 ctxt->nbentities++;
7860
7861 /*
7862 * Request the entity from SAX
7863 */
7864 if ((ctxt->sax != NULL) &&
7865 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007866 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7867 if (ctxt->instate == XML_PARSER_EOF)
7868 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007869 if (entity == NULL) {
7870 /*
7871 * [ WFC: Entity Declared ]
7872 * In a document without any DTD, a document with only an
7873 * internal DTD subset which contains no parameter entity
7874 * references, or a document with "standalone='yes'", ...
7875 * ... The declaration of a parameter entity must precede
7876 * any reference to it...
7877 */
7878 if ((ctxt->standalone == 1) ||
7879 ((ctxt->hasExternalSubset == 0) &&
7880 (ctxt->hasPErefs == 0))) {
7881 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7882 "PEReference: %%%s; not found\n",
7883 name);
7884 } else {
7885 /*
7886 * [ VC: Entity Declared ]
7887 * In a document with an external subset or external
7888 * parameter entities with "standalone='no'", ...
7889 * ... The declaration of a parameter entity must
7890 * precede any reference to it...
7891 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007892 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7893 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7894 "PEReference: %%%s; not found\n",
7895 name, NULL);
7896 } else
7897 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7898 "PEReference: %%%s; not found\n",
7899 name, NULL);
7900 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007901 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007902 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007903 } else {
7904 /*
7905 * Internal checking in case the entity quest barfed
7906 */
7907 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910 "Internal: %%%s; is not a parameter entity\n",
7911 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007912 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007913 xmlChar start[4];
7914 xmlCharEncoding enc;
7915
Neel Mehta90ccb582017-04-07 17:43:02 +02007916 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7917 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7918 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7919 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7920 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7921 (ctxt->replaceEntities == 0) &&
7922 (ctxt->validate == 0))
7923 return;
7924
Daniel Veillard0161e632008-08-28 15:36:32 +00007925 input = xmlNewEntityInputStream(ctxt, entity);
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007926 if (xmlPushInput(ctxt, input) < 0) {
7927 xmlFreeInputStream(input);
Daniel Veillard0161e632008-08-28 15:36:32 +00007928 return;
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007929 }
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02007930
7931 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7932 /*
7933 * Get the 4 first bytes and decode the charset
7934 * if enc != XML_CHAR_ENCODING_NONE
7935 * plug some encoding conversion routines.
7936 * Note that, since we may have some non-UTF8
7937 * encoding (like UTF16, bug 135229), the 'length'
7938 * is not known, but we can calculate based upon
7939 * the amount of data in the buffer.
7940 */
7941 GROW
7942 if (ctxt->instate == XML_PARSER_EOF)
7943 return;
7944 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7945 start[0] = RAW;
7946 start[1] = NXT(1);
7947 start[2] = NXT(2);
7948 start[3] = NXT(3);
7949 enc = xmlDetectCharEncoding(start, 4);
7950 if (enc != XML_CHAR_ENCODING_NONE) {
7951 xmlSwitchEncoding(ctxt, enc);
7952 }
7953 }
7954
7955 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7956 (IS_BLANK_CH(NXT(5)))) {
7957 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02007958 }
7959 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007960 }
7961 }
7962 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007963}
7964
7965/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007966 * xmlLoadEntityContent:
7967 * @ctxt: an XML parser context
7968 * @entity: an unloaded system entity
7969 *
7970 * Load the original content of the given system entity from the
7971 * ExternalID/SystemID given. This is to be used for Included in Literal
7972 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7973 *
7974 * Returns 0 in case of success and -1 in case of failure
7975 */
7976static int
7977xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7978 xmlParserInputPtr input;
7979 xmlBufferPtr buf;
7980 int l, c;
7981 int count = 0;
7982
7983 if ((ctxt == NULL) || (entity == NULL) ||
7984 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7985 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7986 (entity->content != NULL)) {
7987 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7988 "xmlLoadEntityContent parameter error");
7989 return(-1);
7990 }
7991
7992 if (xmlParserDebugEntities)
7993 xmlGenericError(xmlGenericErrorContext,
7994 "Reading %s entity content input\n", entity->name);
7995
7996 buf = xmlBufferCreate();
7997 if (buf == NULL) {
7998 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7999 "xmlLoadEntityContent parameter error");
8000 return(-1);
8001 }
8002
8003 input = xmlNewEntityInputStream(ctxt, entity);
8004 if (input == NULL) {
8005 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8006 "xmlLoadEntityContent input error");
8007 xmlBufferFree(buf);
8008 return(-1);
8009 }
8010
8011 /*
8012 * Push the entity as the current input, read char by char
8013 * saving to the buffer until the end of the entity or an error
8014 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008015 if (xmlPushInput(ctxt, input) < 0) {
8016 xmlBufferFree(buf);
8017 return(-1);
8018 }
8019
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008020 GROW;
8021 c = CUR_CHAR(l);
8022 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8023 (IS_CHAR(c))) {
8024 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008025 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008026 count = 0;
8027 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008028 if (ctxt->instate == XML_PARSER_EOF) {
8029 xmlBufferFree(buf);
8030 return(-1);
8031 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008032 }
8033 NEXTL(l);
8034 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008035 if (c == 0) {
8036 count = 0;
8037 GROW;
8038 if (ctxt->instate == XML_PARSER_EOF) {
8039 xmlBufferFree(buf);
8040 return(-1);
8041 }
8042 c = CUR_CHAR(l);
8043 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008044 }
8045
8046 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8047 xmlPopInput(ctxt);
8048 } else if (!IS_CHAR(c)) {
8049 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8050 "xmlLoadEntityContent: invalid char value %d\n",
8051 c);
8052 xmlBufferFree(buf);
8053 return(-1);
8054 }
8055 entity->content = buf->content;
8056 buf->content = NULL;
8057 xmlBufferFree(buf);
8058
8059 return(0);
8060}
8061
8062/**
Owen Taylor3473f882001-02-23 17:55:21 +00008063 * xmlParseStringPEReference:
8064 * @ctxt: an XML parser context
8065 * @str: a pointer to an index in the string
8066 *
8067 * parse PEReference declarations
8068 *
8069 * [69] PEReference ::= '%' Name ';'
8070 *
8071 * [ WFC: No Recursion ]
8072 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008073 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008074 *
8075 * [ WFC: Entity Declared ]
8076 * In a document without any DTD, a document with only an internal DTD
8077 * subset which contains no parameter entity references, or a document
8078 * with "standalone='yes'", ... ... The declaration of a parameter
8079 * entity must precede any reference to it...
8080 *
8081 * [ VC: Entity Declared ]
8082 * In a document with an external subset or external parameter entities
8083 * with "standalone='no'", ... ... The declaration of a parameter entity
8084 * must precede any reference to it...
8085 *
8086 * [ WFC: In DTD ]
8087 * Parameter-entity references may only appear in the DTD.
8088 * NOTE: misleading but this is handled.
8089 *
8090 * Returns the string of the entity content.
8091 * str is updated to the current value of the index
8092 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008093static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008094xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8095 const xmlChar *ptr;
8096 xmlChar cur;
8097 xmlChar *name;
8098 xmlEntityPtr entity = NULL;
8099
8100 if ((str == NULL) || (*str == NULL)) return(NULL);
8101 ptr = *str;
8102 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008103 if (cur != '%')
8104 return(NULL);
8105 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008106 name = xmlParseStringName(ctxt, &ptr);
8107 if (name == NULL) {
8108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8109 "xmlParseStringPEReference: no name\n");
8110 *str = ptr;
8111 return(NULL);
8112 }
8113 cur = *ptr;
8114 if (cur != ';') {
8115 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8116 xmlFree(name);
8117 *str = ptr;
8118 return(NULL);
8119 }
8120 ptr++;
8121
8122 /*
8123 * Increate the number of entity references parsed
8124 */
8125 ctxt->nbentities++;
8126
8127 /*
8128 * Request the entity from SAX
8129 */
8130 if ((ctxt->sax != NULL) &&
8131 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008132 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8133 if (ctxt->instate == XML_PARSER_EOF) {
8134 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008135 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008136 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008137 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008138 if (entity == NULL) {
8139 /*
8140 * [ WFC: Entity Declared ]
8141 * In a document without any DTD, a document with only an
8142 * internal DTD subset which contains no parameter entity
8143 * references, or a document with "standalone='yes'", ...
8144 * ... The declaration of a parameter entity must precede
8145 * any reference to it...
8146 */
8147 if ((ctxt->standalone == 1) ||
8148 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8149 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8150 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008151 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008152 /*
8153 * [ VC: Entity Declared ]
8154 * In a document with an external subset or external
8155 * parameter entities with "standalone='no'", ...
8156 * ... The declaration of a parameter entity must
8157 * precede any reference to it...
8158 */
8159 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8160 "PEReference: %%%s; not found\n",
8161 name, NULL);
8162 ctxt->valid = 0;
8163 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008164 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008165 } else {
8166 /*
8167 * Internal checking in case the entity quest barfed
8168 */
8169 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8170 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8171 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8172 "%%%s; is not a parameter entity\n",
8173 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008174 }
8175 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008176 ctxt->hasPErefs = 1;
8177 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008178 *str = ptr;
8179 return(entity);
8180}
8181
8182/**
8183 * xmlParseDocTypeDecl:
8184 * @ctxt: an XML parser context
8185 *
8186 * parse a DOCTYPE declaration
8187 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008188 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008189 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8190 *
8191 * [ VC: Root Element Type ]
8192 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008193 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008194 */
8195
8196void
8197xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008198 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008199 xmlChar *ExternalID = NULL;
8200 xmlChar *URI = NULL;
8201
8202 /*
8203 * We know that '<!DOCTYPE' has been detected.
8204 */
8205 SKIP(9);
8206
8207 SKIP_BLANKS;
8208
8209 /*
8210 * Parse the DOCTYPE name.
8211 */
8212 name = xmlParseName(ctxt);
8213 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008214 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8215 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008216 }
8217 ctxt->intSubName = name;
8218
8219 SKIP_BLANKS;
8220
8221 /*
8222 * Check for SystemID and ExternalID
8223 */
8224 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8225
8226 if ((URI != NULL) || (ExternalID != NULL)) {
8227 ctxt->hasExternalSubset = 1;
8228 }
8229 ctxt->extSubURI = URI;
8230 ctxt->extSubSystem = ExternalID;
8231
8232 SKIP_BLANKS;
8233
8234 /*
8235 * Create and update the internal subset.
8236 */
8237 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8238 (!ctxt->disableSAX))
8239 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008240 if (ctxt->instate == XML_PARSER_EOF)
8241 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008242
8243 /*
8244 * Is there any internal subset declarations ?
8245 * they are handled separately in xmlParseInternalSubset()
8246 */
8247 if (RAW == '[')
8248 return;
8249
8250 /*
8251 * We should be at the end of the DOCTYPE declaration.
8252 */
8253 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008254 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008255 }
8256 NEXT;
8257}
8258
8259/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008260 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008261 * @ctxt: an XML parser context
8262 *
8263 * parse the internal subset declaration
8264 *
8265 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8266 */
8267
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008268static void
Owen Taylor3473f882001-02-23 17:55:21 +00008269xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8270 /*
8271 * Is there any DTD definition ?
8272 */
8273 if (RAW == '[') {
8274 ctxt->instate = XML_PARSER_DTD;
8275 NEXT;
8276 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008277 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008278 * PEReferences.
8279 * Subsequence (markupdecl | PEReference | S)*
8280 */
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008281 while (((RAW != ']') || (ctxt->inputNr > 1)) &&
8282 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008283 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008284 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008285
8286 SKIP_BLANKS;
8287 xmlParseMarkupDecl(ctxt);
8288 xmlParsePEReference(ctxt);
8289
Owen Taylor3473f882001-02-23 17:55:21 +00008290 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008291 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008292 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008293 if (ctxt->inputNr > 1)
8294 xmlPopInput(ctxt);
8295 else
8296 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008297 }
8298 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008299 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008300 NEXT;
8301 SKIP_BLANKS;
8302 }
8303 }
8304
8305 /*
8306 * We should be at the end of the DOCTYPE declaration.
8307 */
8308 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008309 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008310 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008311 }
8312 NEXT;
8313}
8314
Daniel Veillard81273902003-09-30 00:43:48 +00008315#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008316/**
8317 * xmlParseAttribute:
8318 * @ctxt: an XML parser context
8319 * @value: a xmlChar ** used to store the value of the attribute
8320 *
8321 * parse an attribute
8322 *
8323 * [41] Attribute ::= Name Eq AttValue
8324 *
8325 * [ WFC: No External Entity References ]
8326 * Attribute values cannot contain direct or indirect entity references
8327 * to external entities.
8328 *
8329 * [ WFC: No < in Attribute Values ]
8330 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008331 * an attribute value (other than "&lt;") must not contain a <.
8332 *
Owen Taylor3473f882001-02-23 17:55:21 +00008333 * [ VC: Attribute Value Type ]
8334 * The attribute must have been declared; the value must be of the type
8335 * declared for it.
8336 *
8337 * [25] Eq ::= S? '=' S?
8338 *
8339 * With namespace:
8340 *
8341 * [NS 11] Attribute ::= QName Eq AttValue
8342 *
8343 * Also the case QName == xmlns:??? is handled independently as a namespace
8344 * definition.
8345 *
8346 * Returns the attribute name, and the value in *value.
8347 */
8348
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008349const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008350xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008351 const xmlChar *name;
8352 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008353
8354 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008355 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008356 name = xmlParseName(ctxt);
8357 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008358 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008359 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008360 return(NULL);
8361 }
8362
8363 /*
8364 * read the value
8365 */
8366 SKIP_BLANKS;
8367 if (RAW == '=') {
8368 NEXT;
8369 SKIP_BLANKS;
8370 val = xmlParseAttValue(ctxt);
8371 ctxt->instate = XML_PARSER_CONTENT;
8372 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008373 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008374 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008375 return(NULL);
8376 }
8377
8378 /*
8379 * Check that xml:lang conforms to the specification
8380 * No more registered as an error, just generate a warning now
8381 * since this was deprecated in XML second edition
8382 */
8383 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8384 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008385 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8386 "Malformed value for xml:lang : %s\n",
8387 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008388 }
8389 }
8390
8391 /*
8392 * Check that xml:space conforms to the specification
8393 */
8394 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8395 if (xmlStrEqual(val, BAD_CAST "default"))
8396 *(ctxt->space) = 0;
8397 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8398 *(ctxt->space) = 1;
8399 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008400 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008401"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008402 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008403 }
8404 }
8405
8406 *value = val;
8407 return(name);
8408}
8409
8410/**
8411 * xmlParseStartTag:
8412 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008413 *
Owen Taylor3473f882001-02-23 17:55:21 +00008414 * parse a start of tag either for rule element or
8415 * EmptyElement. In both case we don't parse the tag closing chars.
8416 *
8417 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8418 *
8419 * [ WFC: Unique Att Spec ]
8420 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008421 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008422 *
8423 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8424 *
8425 * [ WFC: Unique Att Spec ]
8426 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008427 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008428 *
8429 * With namespace:
8430 *
8431 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8432 *
8433 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8434 *
8435 * Returns the element name parsed
8436 */
8437
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008438const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008439xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008440 const xmlChar *name;
8441 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008442 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008443 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008444 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008445 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008446 int i;
8447
8448 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008449 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008450
8451 name = xmlParseName(ctxt);
8452 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008453 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008454 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008455 return(NULL);
8456 }
8457
8458 /*
8459 * Now parse the attributes, it ends up with the ending
8460 *
8461 * (S Attribute)* S?
8462 */
8463 SKIP_BLANKS;
8464 GROW;
8465
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008466 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008467 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008468 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008469 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008470 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008471
8472 attname = xmlParseAttribute(ctxt, &attvalue);
8473 if ((attname != NULL) && (attvalue != NULL)) {
8474 /*
8475 * [ WFC: Unique Att Spec ]
8476 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008477 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008478 */
8479 for (i = 0; i < nbatts;i += 2) {
8480 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008481 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008482 xmlFree(attvalue);
8483 goto failed;
8484 }
8485 }
Owen Taylor3473f882001-02-23 17:55:21 +00008486 /*
8487 * Add the pair to atts
8488 */
8489 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008490 maxatts = 22; /* allow for 10 attrs by default */
8491 atts = (const xmlChar **)
8492 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008493 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008494 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008495 if (attvalue != NULL)
8496 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008497 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008498 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008499 ctxt->atts = atts;
8500 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008501 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008502 const xmlChar **n;
8503
Owen Taylor3473f882001-02-23 17:55:21 +00008504 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008505 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008506 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008507 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008508 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008509 if (attvalue != NULL)
8510 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008511 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008512 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008513 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008514 ctxt->atts = atts;
8515 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008516 }
8517 atts[nbatts++] = attname;
8518 atts[nbatts++] = attvalue;
8519 atts[nbatts] = NULL;
8520 atts[nbatts + 1] = NULL;
8521 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008522 if (attvalue != NULL)
8523 xmlFree(attvalue);
8524 }
8525
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008526failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008527
Daniel Veillard3772de32002-12-17 10:31:45 +00008528 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008529 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8530 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008531 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008532 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8533 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008534 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008535 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8536 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008537 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8538 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008539 break;
8540 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008541 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008542 GROW;
8543 }
8544
8545 /*
8546 * SAX: Start of Element !
8547 */
8548 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008549 (!ctxt->disableSAX)) {
8550 if (nbatts > 0)
8551 ctxt->sax->startElement(ctxt->userData, name, atts);
8552 else
8553 ctxt->sax->startElement(ctxt->userData, name, NULL);
8554 }
Owen Taylor3473f882001-02-23 17:55:21 +00008555
8556 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008557 /* Free only the content strings */
8558 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008559 if (atts[i] != NULL)
8560 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008561 }
8562 return(name);
8563}
8564
8565/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008567 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008568 * @line: line of the start tag
8569 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008570 *
8571 * parse an end of tag
8572 *
8573 * [42] ETag ::= '</' Name S? '>'
8574 *
8575 * With namespace
8576 *
8577 * [NS 9] ETag ::= '</' QName S? '>'
8578 */
8579
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008580static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008581xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008582 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008583
8584 GROW;
8585 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008586 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008587 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008588 return;
8589 }
8590 SKIP(2);
8591
Daniel Veillard46de64e2002-05-29 08:21:33 +00008592 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008593
8594 /*
8595 * We should definitely be at the ending "S? '>'" part
8596 */
8597 GROW;
8598 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008599 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008600 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008601 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008602 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008603
8604 /*
8605 * [ WFC: Element Type Match ]
8606 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008607 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008608 *
8609 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008610 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008611 if (name == NULL) name = BAD_CAST "unparseable";
8612 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008613 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008614 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008615 }
8616
8617 /*
8618 * SAX: End of Tag
8619 */
8620 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8621 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008622 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008623
Daniel Veillarde57ec792003-09-10 10:50:59 +00008624 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008625 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008626 return;
8627}
8628
8629/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008630 * xmlParseEndTag:
8631 * @ctxt: an XML parser context
8632 *
8633 * parse an end of tag
8634 *
8635 * [42] ETag ::= '</' Name S? '>'
8636 *
8637 * With namespace
8638 *
8639 * [NS 9] ETag ::= '</' QName S? '>'
8640 */
8641
8642void
8643xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008644 xmlParseEndTag1(ctxt, 0);
8645}
Daniel Veillard81273902003-09-30 00:43:48 +00008646#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008647
8648/************************************************************************
8649 * *
8650 * SAX 2 specific operations *
8651 * *
8652 ************************************************************************/
8653
Daniel Veillard0fb18932003-09-07 09:14:37 +00008654/*
8655 * xmlGetNamespace:
8656 * @ctxt: an XML parser context
8657 * @prefix: the prefix to lookup
8658 *
8659 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008660 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008661 *
8662 * Returns the namespace name or NULL if not bound
8663 */
8664static const xmlChar *
8665xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8666 int i;
8667
Daniel Veillarde57ec792003-09-10 10:50:59 +00008668 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008669 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008670 if (ctxt->nsTab[i] == prefix) {
8671 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8672 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008673 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008674 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008675 return(NULL);
8676}
8677
8678/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008679 * xmlParseQName:
8680 * @ctxt: an XML parser context
8681 * @prefix: pointer to store the prefix part
8682 *
8683 * parse an XML Namespace QName
8684 *
8685 * [6] QName ::= (Prefix ':')? LocalPart
8686 * [7] Prefix ::= NCName
8687 * [8] LocalPart ::= NCName
8688 *
8689 * Returns the Name parsed or NULL
8690 */
8691
8692static const xmlChar *
8693xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8694 const xmlChar *l, *p;
8695
8696 GROW;
8697
8698 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008699 if (l == NULL) {
8700 if (CUR == ':') {
8701 l = xmlParseName(ctxt);
8702 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008703 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008704 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008705 *prefix = NULL;
8706 return(l);
8707 }
8708 }
8709 return(NULL);
8710 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008711 if (CUR == ':') {
8712 NEXT;
8713 p = l;
8714 l = xmlParseNCName(ctxt);
8715 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008716 xmlChar *tmp;
8717
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008718 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8719 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008720 l = xmlParseNmtoken(ctxt);
8721 if (l == NULL)
8722 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8723 else {
8724 tmp = xmlBuildQName(l, p, NULL, 0);
8725 xmlFree((char *)l);
8726 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008727 p = xmlDictLookup(ctxt->dict, tmp, -1);
8728 if (tmp != NULL) xmlFree(tmp);
8729 *prefix = NULL;
8730 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008731 }
8732 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008733 xmlChar *tmp;
8734
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008735 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8736 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008737 NEXT;
8738 tmp = (xmlChar *) xmlParseName(ctxt);
8739 if (tmp != NULL) {
8740 tmp = xmlBuildQName(tmp, l, NULL, 0);
8741 l = xmlDictLookup(ctxt->dict, tmp, -1);
8742 if (tmp != NULL) xmlFree(tmp);
8743 *prefix = p;
8744 return(l);
8745 }
8746 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8747 l = xmlDictLookup(ctxt->dict, tmp, -1);
8748 if (tmp != NULL) xmlFree(tmp);
8749 *prefix = p;
8750 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008751 }
8752 *prefix = p;
8753 } else
8754 *prefix = NULL;
8755 return(l);
8756}
8757
8758/**
8759 * xmlParseQNameAndCompare:
8760 * @ctxt: an XML parser context
8761 * @name: the localname
8762 * @prefix: the prefix, if any.
8763 *
8764 * parse an XML name and compares for match
8765 * (specialized for endtag parsing)
8766 *
8767 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8768 * and the name for mismatch
8769 */
8770
8771static const xmlChar *
8772xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8773 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008774 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008775 const xmlChar *in;
8776 const xmlChar *ret;
8777 const xmlChar *prefix2;
8778
8779 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8780
8781 GROW;
8782 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008783
Daniel Veillard0fb18932003-09-07 09:14:37 +00008784 cmp = prefix;
8785 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008786 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008787 ++cmp;
8788 }
8789 if ((*cmp == 0) && (*in == ':')) {
8790 in++;
8791 cmp = name;
8792 while (*in != 0 && *in == *cmp) {
8793 ++in;
8794 ++cmp;
8795 }
William M. Brack76e95df2003-10-18 16:20:14 +00008796 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008797 /* success */
8798 ctxt->input->cur = in;
8799 return((const xmlChar*) 1);
8800 }
8801 }
8802 /*
8803 * all strings coms from the dictionary, equality can be done directly
8804 */
8805 ret = xmlParseQName (ctxt, &prefix2);
8806 if ((ret == name) && (prefix == prefix2))
8807 return((const xmlChar*) 1);
8808 return ret;
8809}
8810
8811/**
8812 * xmlParseAttValueInternal:
8813 * @ctxt: an XML parser context
8814 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008815 * @alloc: whether the attribute was reallocated as a new string
8816 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008817 *
8818 * parse a value for an attribute.
8819 * NOTE: if no normalization is needed, the routine will return pointers
8820 * directly from the data buffer.
8821 *
8822 * 3.3.3 Attribute-Value Normalization:
8823 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008824 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008825 * - a character reference is processed by appending the referenced
8826 * character to the attribute value
8827 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008828 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008829 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8830 * appending #x20 to the normalized value, except that only a single
8831 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008832 * parsed entity or the literal entity value of an internal parsed entity
8833 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008834 * If the declared value is not CDATA, then the XML processor must further
8835 * process the normalized attribute value by discarding any leading and
8836 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008837 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008838 * All attributes for which no declaration has been read should be treated
8839 * by a non-validating parser as if declared CDATA.
8840 *
8841 * Returns the AttValue parsed or NULL. The value has to be freed by the
8842 * caller if it was copied, this can be detected by val[*len] == 0.
8843 */
8844
8845static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008846xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8847 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008848{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008849 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008850 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008852 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008853
8854 GROW;
8855 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008856 line = ctxt->input->line;
8857 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008858 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008859 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008860 return (NULL);
8861 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008862 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008863
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008864 /*
8865 * try to handle in this routine the most common case where no
8866 * allocation of a new string is required and where content is
8867 * pure ASCII.
8868 */
8869 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008870 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008871 end = ctxt->input->end;
8872 start = in;
8873 if (in >= end) {
8874 const xmlChar *oldbase = ctxt->input->base;
8875 GROW;
8876 if (oldbase != ctxt->input->base) {
8877 long delta = ctxt->input->base - oldbase;
8878 start = start + delta;
8879 in = in + delta;
8880 }
8881 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008882 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008883 if (normalize) {
8884 /*
8885 * Skip any leading spaces
8886 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008887 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008888 ((*in == 0x20) || (*in == 0x9) ||
8889 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008890 if (*in == 0xA) {
8891 line++; col = 1;
8892 } else {
8893 col++;
8894 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008895 in++;
8896 start = in;
8897 if (in >= end) {
8898 const xmlChar *oldbase = ctxt->input->base;
8899 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008900 if (ctxt->instate == XML_PARSER_EOF)
8901 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008902 if (oldbase != ctxt->input->base) {
8903 long delta = ctxt->input->base - oldbase;
8904 start = start + delta;
8905 in = in + delta;
8906 }
8907 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008908 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8909 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8910 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008911 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008912 return(NULL);
8913 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008914 }
8915 }
8916 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8917 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008918 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008919 if ((*in++ == 0x20) && (*in == 0x20)) break;
8920 if (in >= end) {
8921 const xmlChar *oldbase = ctxt->input->base;
8922 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008923 if (ctxt->instate == XML_PARSER_EOF)
8924 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008925 if (oldbase != ctxt->input->base) {
8926 long delta = ctxt->input->base - oldbase;
8927 start = start + delta;
8928 in = in + delta;
8929 }
8930 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008931 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8932 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8933 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008934 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008935 return(NULL);
8936 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008937 }
8938 }
8939 last = in;
8940 /*
8941 * skip the trailing blanks
8942 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008943 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008944 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008945 ((*in == 0x20) || (*in == 0x9) ||
8946 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008947 if (*in == 0xA) {
8948 line++, col = 1;
8949 } else {
8950 col++;
8951 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008952 in++;
8953 if (in >= end) {
8954 const xmlChar *oldbase = ctxt->input->base;
8955 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008956 if (ctxt->instate == XML_PARSER_EOF)
8957 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008958 if (oldbase != ctxt->input->base) {
8959 long delta = ctxt->input->base - oldbase;
8960 start = start + delta;
8961 in = in + delta;
8962 last = last + delta;
8963 }
8964 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008965 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8966 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8967 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008968 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008969 return(NULL);
8970 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008971 }
8972 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008973 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8974 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8975 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008976 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008977 return(NULL);
8978 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008979 if (*in != limit) goto need_complex;
8980 } else {
8981 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8982 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8983 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008984 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008985 if (in >= end) {
8986 const xmlChar *oldbase = ctxt->input->base;
8987 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008988 if (ctxt->instate == XML_PARSER_EOF)
8989 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008990 if (oldbase != ctxt->input->base) {
8991 long delta = ctxt->input->base - oldbase;
8992 start = start + delta;
8993 in = in + delta;
8994 }
8995 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008996 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8997 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8998 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008999 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009000 return(NULL);
9001 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009002 }
9003 }
9004 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009005 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9006 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9007 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009008 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009009 return(NULL);
9010 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009011 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009012 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009013 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009014 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009015 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009016 *len = last - start;
9017 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009018 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009019 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009020 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009021 }
9022 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009023 ctxt->input->line = line;
9024 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009025 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009026 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009027need_complex:
9028 if (alloc) *alloc = 1;
9029 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009030}
9031
9032/**
9033 * xmlParseAttribute2:
9034 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009035 * @pref: the element prefix
9036 * @elem: the element name
9037 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009038 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009039 * @len: an int * to save the length of the attribute
9040 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009041 *
9042 * parse an attribute in the new SAX2 framework.
9043 *
9044 * Returns the attribute name, and the value in *value, .
9045 */
9046
9047static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009048xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009049 const xmlChar * pref, const xmlChar * elem,
9050 const xmlChar ** prefix, xmlChar ** value,
9051 int *len, int *alloc)
9052{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009053 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009054 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009055 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009056
9057 *value = NULL;
9058 GROW;
9059 name = xmlParseQName(ctxt, prefix);
9060 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009061 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9062 "error parsing attribute name\n");
9063 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009064 }
9065
9066 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009067 * get the type if needed
9068 */
9069 if (ctxt->attsSpecial != NULL) {
9070 int type;
9071
9072 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009073 pref, elem, *prefix, name);
9074 if (type != 0)
9075 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009076 }
9077
9078 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009079 * read the value
9080 */
9081 SKIP_BLANKS;
9082 if (RAW == '=') {
9083 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009084 SKIP_BLANKS;
9085 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9086 if (normalize) {
9087 /*
9088 * Sometimes a second normalisation pass for spaces is needed
9089 * but that only happens if charrefs or entities refernces
9090 * have been used in the attribute value, i.e. the attribute
9091 * value have been extracted in an allocated string already.
9092 */
9093 if (*alloc) {
9094 const xmlChar *val2;
9095
9096 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009097 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009098 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009099 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009100 }
9101 }
9102 }
9103 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009104 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009105 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009106 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009107 name);
9108 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009109 }
9110
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009111 if (*prefix == ctxt->str_xml) {
9112 /*
9113 * Check that xml:lang conforms to the specification
9114 * No more registered as an error, just generate a warning now
9115 * since this was deprecated in XML second edition
9116 */
9117 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9118 internal_val = xmlStrndup(val, *len);
9119 if (!xmlCheckLanguageID(internal_val)) {
9120 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9121 "Malformed value for xml:lang : %s\n",
9122 internal_val, NULL);
9123 }
9124 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009125
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009126 /*
9127 * Check that xml:space conforms to the specification
9128 */
9129 if (xmlStrEqual(name, BAD_CAST "space")) {
9130 internal_val = xmlStrndup(val, *len);
9131 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9132 *(ctxt->space) = 0;
9133 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9134 *(ctxt->space) = 1;
9135 else {
9136 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9137 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9138 internal_val, NULL);
9139 }
9140 }
9141 if (internal_val) {
9142 xmlFree(internal_val);
9143 }
9144 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009145
9146 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009147 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009148}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009149/**
9150 * xmlParseStartTag2:
9151 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009152 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009153 * parse a start of tag either for rule element or
9154 * EmptyElement. In both case we don't parse the tag closing chars.
9155 * This routine is called when running SAX2 parsing
9156 *
9157 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9158 *
9159 * [ WFC: Unique Att Spec ]
9160 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009161 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009162 *
9163 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9164 *
9165 * [ WFC: Unique Att Spec ]
9166 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009167 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009168 *
9169 * With namespace:
9170 *
9171 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9172 *
9173 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9174 *
9175 * Returns the element name parsed
9176 */
9177
9178static const xmlChar *
9179xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009180 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009181 const xmlChar *localname;
9182 const xmlChar *prefix;
9183 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009184 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009185 const xmlChar *nsname;
9186 xmlChar *attvalue;
9187 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009188 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009189 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009190 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009191 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009192 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009193
9194 if (RAW != '<') return(NULL);
9195 NEXT1;
9196
9197 /*
9198 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9199 * point since the attribute values may be stored as pointers to
9200 * the buffer and calling SHRINK would destroy them !
9201 * The Shrinking is only possible once the full set of attribute
9202 * callbacks have been done.
9203 */
9204 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009205 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009206 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009207 nbatts = 0;
9208 nratts = 0;
9209 nbdef = 0;
9210 nbNs = 0;
9211 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009212 /* Forget any namespaces added during an earlier parse of this element. */
9213 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009214
9215 localname = xmlParseQName(ctxt, &prefix);
9216 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009217 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9218 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009219 return(NULL);
9220 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009221 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009222
9223 /*
9224 * Now parse the attributes, it ends up with the ending
9225 *
9226 * (S Attribute)* S?
9227 */
9228 SKIP_BLANKS;
9229 GROW;
9230
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009231 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009232 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009233 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009234 const xmlChar *q = CUR_PTR;
9235 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009236 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009237
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009238 attname = xmlParseAttribute2(ctxt, prefix, localname,
9239 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009240 if ((attname == NULL) || (attvalue == NULL))
9241 goto next_attr;
9242 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009243
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009244 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9245 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9246 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009247
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009248 if (URL == NULL) {
9249 xmlErrMemory(ctxt, "dictionary allocation failure");
9250 if ((attvalue != NULL) && (alloc != 0))
9251 xmlFree(attvalue);
9252 return(NULL);
9253 }
9254 if (*URL != 0) {
9255 uri = xmlParseURI((const char *) URL);
9256 if (uri == NULL) {
9257 xmlNsErr(ctxt, XML_WAR_NS_URI,
9258 "xmlns: '%s' is not a valid URI\n",
9259 URL, NULL, NULL);
9260 } else {
9261 if (uri->scheme == NULL) {
9262 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9263 "xmlns: URI %s is not absolute\n",
9264 URL, NULL, NULL);
9265 }
9266 xmlFreeURI(uri);
9267 }
Daniel Veillard37334572008-07-31 08:20:02 +00009268 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009269 if (attname != ctxt->str_xml) {
9270 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9271 "xml namespace URI cannot be the default namespace\n",
9272 NULL, NULL, NULL);
9273 }
9274 goto next_attr;
9275 }
9276 if ((len == 29) &&
9277 (xmlStrEqual(URL,
9278 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9279 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9280 "reuse of the xmlns namespace name is forbidden\n",
9281 NULL, NULL, NULL);
9282 goto next_attr;
9283 }
9284 }
9285 /*
9286 * check that it's not a defined namespace
9287 */
9288 for (j = 1;j <= nbNs;j++)
9289 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9290 break;
9291 if (j <= nbNs)
9292 xmlErrAttributeDup(ctxt, NULL, attname);
9293 else
9294 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009295
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009296 } else if (aprefix == ctxt->str_xmlns) {
9297 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9298 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009299
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009300 if (attname == ctxt->str_xml) {
9301 if (URL != ctxt->str_xml_ns) {
9302 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9303 "xml namespace prefix mapped to wrong URI\n",
9304 NULL, NULL, NULL);
9305 }
9306 /*
9307 * Do not keep a namespace definition node
9308 */
9309 goto next_attr;
9310 }
9311 if (URL == ctxt->str_xml_ns) {
9312 if (attname != ctxt->str_xml) {
9313 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9314 "xml namespace URI mapped to wrong prefix\n",
9315 NULL, NULL, NULL);
9316 }
9317 goto next_attr;
9318 }
9319 if (attname == ctxt->str_xmlns) {
9320 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9321 "redefinition of the xmlns prefix is forbidden\n",
9322 NULL, NULL, NULL);
9323 goto next_attr;
9324 }
9325 if ((len == 29) &&
9326 (xmlStrEqual(URL,
9327 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9328 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9329 "reuse of the xmlns namespace name is forbidden\n",
9330 NULL, NULL, NULL);
9331 goto next_attr;
9332 }
9333 if ((URL == NULL) || (URL[0] == 0)) {
9334 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9335 "xmlns:%s: Empty XML namespace is not allowed\n",
9336 attname, NULL, NULL);
9337 goto next_attr;
9338 } else {
9339 uri = xmlParseURI((const char *) URL);
9340 if (uri == NULL) {
9341 xmlNsErr(ctxt, XML_WAR_NS_URI,
9342 "xmlns:%s: '%s' is not a valid URI\n",
9343 attname, URL, NULL);
9344 } else {
9345 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9346 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9347 "xmlns:%s: URI %s is not absolute\n",
9348 attname, URL, NULL);
9349 }
9350 xmlFreeURI(uri);
9351 }
9352 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009353
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009354 /*
9355 * check that it's not a defined namespace
9356 */
9357 for (j = 1;j <= nbNs;j++)
9358 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9359 break;
9360 if (j <= nbNs)
9361 xmlErrAttributeDup(ctxt, aprefix, attname);
9362 else
9363 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9364
9365 } else {
9366 /*
9367 * Add the pair to atts
9368 */
9369 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9370 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9371 goto next_attr;
9372 }
9373 maxatts = ctxt->maxatts;
9374 atts = ctxt->atts;
9375 }
9376 ctxt->attallocs[nratts++] = alloc;
9377 atts[nbatts++] = attname;
9378 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009379 /*
9380 * The namespace URI field is used temporarily to point at the
9381 * base of the current input buffer for non-alloced attributes.
9382 * When the input buffer is reallocated, all the pointers become
9383 * invalid, but they can be reconstructed later.
9384 */
9385 if (alloc)
9386 atts[nbatts++] = NULL;
9387 else
9388 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009389 atts[nbatts++] = attvalue;
9390 attvalue += len;
9391 atts[nbatts++] = attvalue;
9392 /*
9393 * tag if some deallocation is needed
9394 */
9395 if (alloc != 0) attval = 1;
9396 attvalue = NULL; /* moved into atts */
9397 }
9398
9399next_attr:
9400 if ((attvalue != NULL) && (alloc != 0)) {
9401 xmlFree(attvalue);
9402 attvalue = NULL;
9403 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009404
9405 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009406 if (ctxt->instate == XML_PARSER_EOF)
9407 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009408 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9409 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009410 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009411 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9412 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009413 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009414 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009415 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9416 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009417 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009418 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009419 break;
9420 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009421 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009422 }
9423
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009424 if (ctxt->input->id != inputid) {
9425 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9426 "Unexpected change of input\n");
9427 localname = NULL;
9428 goto done;
9429 }
9430
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009431 /* Reconstruct attribute value pointers. */
9432 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9433 if (atts[i+2] != NULL) {
9434 /*
9435 * Arithmetic on dangling pointers is technically undefined
9436 * behavior, but well...
9437 */
9438 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9439 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9440 atts[i+3] += offset; /* value */
9441 atts[i+4] += offset; /* valuend */
9442 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009443 }
9444
Daniel Veillard0fb18932003-09-07 09:14:37 +00009445 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009446 * The attributes defaulting
9447 */
9448 if (ctxt->attsDefault != NULL) {
9449 xmlDefAttrsPtr defaults;
9450
9451 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9452 if (defaults != NULL) {
9453 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009454 attname = defaults->values[5 * i];
9455 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009456
9457 /*
9458 * special work for namespaces defaulted defs
9459 */
9460 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9461 /*
9462 * check that it's not a defined namespace
9463 */
9464 for (j = 1;j <= nbNs;j++)
9465 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9466 break;
9467 if (j <= nbNs) continue;
9468
9469 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009470 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009471 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009472 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009473 nbNs++;
9474 }
9475 } else if (aprefix == ctxt->str_xmlns) {
9476 /*
9477 * check that it's not a defined namespace
9478 */
9479 for (j = 1;j <= nbNs;j++)
9480 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9481 break;
9482 if (j <= nbNs) continue;
9483
9484 nsname = xmlGetNamespace(ctxt, attname);
9485 if (nsname != defaults->values[2]) {
9486 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009487 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009488 nbNs++;
9489 }
9490 } else {
9491 /*
9492 * check that it's not a defined attribute
9493 */
9494 for (j = 0;j < nbatts;j+=5) {
9495 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9496 break;
9497 }
9498 if (j < nbatts) continue;
9499
9500 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9501 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009502 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009503 }
9504 maxatts = ctxt->maxatts;
9505 atts = ctxt->atts;
9506 }
9507 atts[nbatts++] = attname;
9508 atts[nbatts++] = aprefix;
9509 if (aprefix == NULL)
9510 atts[nbatts++] = NULL;
9511 else
9512 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009513 atts[nbatts++] = defaults->values[5 * i + 2];
9514 atts[nbatts++] = defaults->values[5 * i + 3];
9515 if ((ctxt->standalone == 1) &&
9516 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009517 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009518 "standalone: attribute %s on %s defaulted from external subset\n",
9519 attname, localname);
9520 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009521 nbdef++;
9522 }
9523 }
9524 }
9525 }
9526
Daniel Veillarde70c8772003-11-25 07:21:18 +00009527 /*
9528 * The attributes checkings
9529 */
9530 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009531 /*
9532 * The default namespace does not apply to attribute names.
9533 */
9534 if (atts[i + 1] != NULL) {
9535 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9536 if (nsname == NULL) {
9537 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9538 "Namespace prefix %s for %s on %s is not defined\n",
9539 atts[i + 1], atts[i], localname);
9540 }
9541 atts[i + 2] = nsname;
9542 } else
9543 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009544 /*
9545 * [ WFC: Unique Att Spec ]
9546 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009547 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009548 * As extended by the Namespace in XML REC.
9549 */
9550 for (j = 0; j < i;j += 5) {
9551 if (atts[i] == atts[j]) {
9552 if (atts[i+1] == atts[j+1]) {
9553 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9554 break;
9555 }
9556 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9557 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9558 "Namespaced Attribute %s in '%s' redefined\n",
9559 atts[i], nsname, NULL);
9560 break;
9561 }
9562 }
9563 }
9564 }
9565
Daniel Veillarde57ec792003-09-10 10:50:59 +00009566 nsname = xmlGetNamespace(ctxt, prefix);
9567 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009568 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9569 "Namespace prefix %s on %s is not defined\n",
9570 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009571 }
9572 *pref = prefix;
9573 *URI = nsname;
9574
9575 /*
9576 * SAX: Start of Element !
9577 */
9578 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9579 (!ctxt->disableSAX)) {
9580 if (nbNs > 0)
9581 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9582 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9583 nbatts / 5, nbdef, atts);
9584 else
9585 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9586 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9587 }
9588
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009589done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009590 /*
9591 * Free up attribute allocated strings if needed
9592 */
9593 if (attval != 0) {
9594 for (i = 3,j = 0; j < nratts;i += 5,j++)
9595 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9596 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009597 }
9598
9599 return(localname);
9600}
9601
9602/**
9603 * xmlParseEndTag2:
9604 * @ctxt: an XML parser context
9605 * @line: line of the start tag
9606 * @nsNr: number of namespaces on the start tag
9607 *
9608 * parse an end of tag
9609 *
9610 * [42] ETag ::= '</' Name S? '>'
9611 *
9612 * With namespace
9613 *
9614 * [NS 9] ETag ::= '</' QName S? '>'
9615 */
9616
9617static void
9618xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009619 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009620 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009621 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009622
9623 GROW;
9624 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009625 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009626 return;
9627 }
9628 SKIP(2);
9629
David Kilzerdb07dd62016-02-12 09:58:29 -08009630 curLength = ctxt->input->end - ctxt->input->cur;
9631 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9632 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9633 if ((curLength >= (size_t)(tlen + 1)) &&
9634 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009635 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009636 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009637 goto done;
9638 }
9639 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009640 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009641 name = (xmlChar*)1;
9642 } else {
9643 if (prefix == NULL)
9644 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9645 else
9646 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9647 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009648
9649 /*
9650 * We should definitely be at the ending "S? '>'" part
9651 */
9652 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009653 if (ctxt->instate == XML_PARSER_EOF)
9654 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009655 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009656 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009657 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009658 } else
9659 NEXT1;
9660
9661 /*
9662 * [ WFC: Element Type Match ]
9663 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009664 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009665 *
9666 */
9667 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009668 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009669 if ((line == 0) && (ctxt->node != NULL))
9670 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009671 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009672 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009673 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009674 }
9675
9676 /*
9677 * SAX: End of Tag
9678 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009679done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009680 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9681 (!ctxt->disableSAX))
9682 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9683
Daniel Veillard0fb18932003-09-07 09:14:37 +00009684 spacePop(ctxt);
9685 if (nsNr != 0)
9686 nsPop(ctxt, nsNr);
9687 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009688}
9689
9690/**
Owen Taylor3473f882001-02-23 17:55:21 +00009691 * xmlParseCDSect:
9692 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009693 *
Owen Taylor3473f882001-02-23 17:55:21 +00009694 * Parse escaped pure raw content.
9695 *
9696 * [18] CDSect ::= CDStart CData CDEnd
9697 *
9698 * [19] CDStart ::= '<![CDATA['
9699 *
9700 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9701 *
9702 * [21] CDEnd ::= ']]>'
9703 */
9704void
9705xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9706 xmlChar *buf = NULL;
9707 int len = 0;
9708 int size = XML_PARSER_BUFFER_SIZE;
9709 int r, rl;
9710 int s, sl;
9711 int cur, l;
9712 int count = 0;
9713
Daniel Veillard8f597c32003-10-06 08:19:27 +00009714 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009715 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009716 SKIP(9);
9717 } else
9718 return;
9719
9720 ctxt->instate = XML_PARSER_CDATA_SECTION;
9721 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009722 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009723 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009724 ctxt->instate = XML_PARSER_CONTENT;
9725 return;
9726 }
9727 NEXTL(rl);
9728 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009729 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009730 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009731 ctxt->instate = XML_PARSER_CONTENT;
9732 return;
9733 }
9734 NEXTL(sl);
9735 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009736 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009737 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009738 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009739 return;
9740 }
William M. Brack871611b2003-10-18 04:53:14 +00009741 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009742 ((r != ']') || (s != ']') || (cur != '>'))) {
9743 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009744 xmlChar *tmp;
9745
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009746 if ((size > XML_MAX_TEXT_LENGTH) &&
9747 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9748 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9749 "CData section too big found", NULL);
9750 xmlFree (buf);
9751 return;
9752 }
9753 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009754 if (tmp == NULL) {
9755 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009756 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009757 return;
9758 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009759 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009760 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009761 }
9762 COPY_BUF(rl,buf,len,r);
9763 r = s;
9764 rl = sl;
9765 s = cur;
9766 sl = l;
9767 count++;
9768 if (count > 50) {
9769 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009770 if (ctxt->instate == XML_PARSER_EOF) {
9771 xmlFree(buf);
9772 return;
9773 }
Owen Taylor3473f882001-02-23 17:55:21 +00009774 count = 0;
9775 }
9776 NEXTL(l);
9777 cur = CUR_CHAR(l);
9778 }
9779 buf[len] = 0;
9780 ctxt->instate = XML_PARSER_CONTENT;
9781 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009782 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009783 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009784 xmlFree(buf);
9785 return;
9786 }
9787 NEXTL(l);
9788
9789 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009790 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009791 */
9792 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9793 if (ctxt->sax->cdataBlock != NULL)
9794 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009795 else if (ctxt->sax->characters != NULL)
9796 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009797 }
9798 xmlFree(buf);
9799}
9800
9801/**
9802 * xmlParseContent:
9803 * @ctxt: an XML parser context
9804 *
9805 * Parse a content:
9806 *
9807 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9808 */
9809
9810void
9811xmlParseContent(xmlParserCtxtPtr ctxt) {
9812 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009813 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009814 ((RAW != '<') || (NXT(1) != '/')) &&
9815 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009816 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009817 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009818 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009819
9820 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009821 * First case : a Processing Instruction.
9822 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009823 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009824 xmlParsePI(ctxt);
9825 }
9826
9827 /*
9828 * Second case : a CDSection
9829 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009830 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009831 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009832 xmlParseCDSect(ctxt);
9833 }
9834
9835 /*
9836 * Third case : a comment
9837 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009838 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009839 (NXT(2) == '-') && (NXT(3) == '-')) {
9840 xmlParseComment(ctxt);
9841 ctxt->instate = XML_PARSER_CONTENT;
9842 }
9843
9844 /*
9845 * Fourth case : a sub-element.
9846 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009847 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009848 xmlParseElement(ctxt);
9849 }
9850
9851 /*
9852 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009853 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009854 */
9855
Daniel Veillard21a0f912001-02-25 19:54:14 +00009856 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009857 xmlParseReference(ctxt);
9858 }
9859
9860 /*
9861 * Last case, text. Note that References are handled directly.
9862 */
9863 else {
9864 xmlParseCharData(ctxt, 0);
9865 }
9866
9867 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009868 SHRINK;
9869
Daniel Veillardfdc91562002-07-01 21:52:03 +00009870 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009871 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9872 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009873 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009874 break;
9875 }
9876 }
9877}
9878
9879/**
9880 * xmlParseElement:
9881 * @ctxt: an XML parser context
9882 *
9883 * parse an XML element, this is highly recursive
9884 *
9885 * [39] element ::= EmptyElemTag | STag content ETag
9886 *
9887 * [ WFC: Element Type Match ]
9888 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009889 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009890 *
Owen Taylor3473f882001-02-23 17:55:21 +00009891 */
9892
9893void
9894xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009895 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009896 const xmlChar *prefix = NULL;
9897 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009898 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009899 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009900 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009901 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009902
Daniel Veillard8915c152008-08-26 13:05:34 +00009903 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9904 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9905 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9906 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9907 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08009908 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009909 return;
9910 }
9911
Owen Taylor3473f882001-02-23 17:55:21 +00009912 /* Capture start position */
9913 if (ctxt->record_info) {
9914 node_info.begin_pos = ctxt->input->consumed +
9915 (CUR_PTR - ctxt->input->base);
9916 node_info.begin_line = ctxt->input->line;
9917 }
9918
9919 if (ctxt->spaceNr == 0)
9920 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009921 else if (*ctxt->space == -2)
9922 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009923 else
9924 spacePush(ctxt, *ctxt->space);
9925
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009926 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009927#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009928 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009929#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009930 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009931#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009932 else
9933 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009934#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009935 if (ctxt->instate == XML_PARSER_EOF)
9936 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009937 if (name == NULL) {
9938 spacePop(ctxt);
9939 return;
9940 }
9941 namePush(ctxt, name);
9942 ret = ctxt->node;
9943
Daniel Veillard4432df22003-09-28 18:58:27 +00009944#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009945 /*
9946 * [ VC: Root Element Type ]
9947 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009948 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009949 */
9950 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9951 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9952 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009953#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009954
9955 /*
9956 * Check for an Empty Element.
9957 */
9958 if ((RAW == '/') && (NXT(1) == '>')) {
9959 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009960 if (ctxt->sax2) {
9961 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9962 (!ctxt->disableSAX))
9963 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009964#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009965 } else {
9966 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9967 (!ctxt->disableSAX))
9968 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009969#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009970 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009971 namePop(ctxt);
9972 spacePop(ctxt);
9973 if (nsNr != ctxt->nsNr)
9974 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009975 if ( ret != NULL && ctxt->record_info ) {
9976 node_info.end_pos = ctxt->input->consumed +
9977 (CUR_PTR - ctxt->input->base);
9978 node_info.end_line = ctxt->input->line;
9979 node_info.node = ret;
9980 xmlParserAddNodeInfo(ctxt, &node_info);
9981 }
9982 return;
9983 }
9984 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009985 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009986 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009987 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9988 "Couldn't find end of Start Tag %s line %d\n",
9989 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009990
9991 /*
9992 * end of parsing of this node.
9993 */
9994 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009995 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009996 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009997 if (nsNr != ctxt->nsNr)
9998 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009999
10000 /*
10001 * Capture end position and add node
10002 */
10003 if ( ret != NULL && ctxt->record_info ) {
10004 node_info.end_pos = ctxt->input->consumed +
10005 (CUR_PTR - ctxt->input->base);
10006 node_info.end_line = ctxt->input->line;
10007 node_info.node = ret;
10008 xmlParserAddNodeInfo(ctxt, &node_info);
10009 }
10010 return;
10011 }
10012
10013 /*
10014 * Parse the content of the element:
10015 */
10016 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010017 if (ctxt->instate == XML_PARSER_EOF)
10018 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010019 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010020 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010021 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010022 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010023
10024 /*
10025 * end of parsing of this node.
10026 */
10027 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010028 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010029 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010030 if (nsNr != ctxt->nsNr)
10031 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010032 return;
10033 }
10034
10035 /*
10036 * parse the end of tag: '</' should be here.
10037 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010038 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010039 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010040 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010041 }
10042#ifdef LIBXML_SAX1_ENABLED
10043 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010044 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010045#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010046
10047 /*
10048 * Capture end position and add node
10049 */
10050 if ( ret != NULL && ctxt->record_info ) {
10051 node_info.end_pos = ctxt->input->consumed +
10052 (CUR_PTR - ctxt->input->base);
10053 node_info.end_line = ctxt->input->line;
10054 node_info.node = ret;
10055 xmlParserAddNodeInfo(ctxt, &node_info);
10056 }
10057}
10058
10059/**
10060 * xmlParseVersionNum:
10061 * @ctxt: an XML parser context
10062 *
10063 * parse the XML version value.
10064 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010065 * [26] VersionNum ::= '1.' [0-9]+
10066 *
10067 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010068 *
10069 * Returns the string giving the XML version number, or NULL
10070 */
10071xmlChar *
10072xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10073 xmlChar *buf = NULL;
10074 int len = 0;
10075 int size = 10;
10076 xmlChar cur;
10077
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010078 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010079 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010080 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010081 return(NULL);
10082 }
10083 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010084 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010085 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010086 return(NULL);
10087 }
10088 buf[len++] = cur;
10089 NEXT;
10090 cur=CUR;
10091 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010092 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010093 return(NULL);
10094 }
10095 buf[len++] = cur;
10096 NEXT;
10097 cur=CUR;
10098 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010099 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010100 xmlChar *tmp;
10101
Owen Taylor3473f882001-02-23 17:55:21 +000010102 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010103 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10104 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010105 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010106 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010107 return(NULL);
10108 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010109 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010110 }
10111 buf[len++] = cur;
10112 NEXT;
10113 cur=CUR;
10114 }
10115 buf[len] = 0;
10116 return(buf);
10117}
10118
10119/**
10120 * xmlParseVersionInfo:
10121 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010122 *
Owen Taylor3473f882001-02-23 17:55:21 +000010123 * parse the XML version.
10124 *
10125 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010126 *
Owen Taylor3473f882001-02-23 17:55:21 +000010127 * [25] Eq ::= S? '=' S?
10128 *
10129 * Returns the version string, e.g. "1.0"
10130 */
10131
10132xmlChar *
10133xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10134 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010135
Daniel Veillarda07050d2003-10-19 14:46:32 +000010136 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010137 SKIP(7);
10138 SKIP_BLANKS;
10139 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010140 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010141 return(NULL);
10142 }
10143 NEXT;
10144 SKIP_BLANKS;
10145 if (RAW == '"') {
10146 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010147 version = xmlParseVersionNum(ctxt);
10148 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010149 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010150 } else
10151 NEXT;
10152 } else if (RAW == '\''){
10153 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010154 version = xmlParseVersionNum(ctxt);
10155 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010156 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010157 } else
10158 NEXT;
10159 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010160 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010161 }
10162 }
10163 return(version);
10164}
10165
10166/**
10167 * xmlParseEncName:
10168 * @ctxt: an XML parser context
10169 *
10170 * parse the XML encoding name
10171 *
10172 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10173 *
10174 * Returns the encoding name value or NULL
10175 */
10176xmlChar *
10177xmlParseEncName(xmlParserCtxtPtr ctxt) {
10178 xmlChar *buf = NULL;
10179 int len = 0;
10180 int size = 10;
10181 xmlChar cur;
10182
10183 cur = CUR;
10184 if (((cur >= 'a') && (cur <= 'z')) ||
10185 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010186 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010187 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010188 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010189 return(NULL);
10190 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010191
Owen Taylor3473f882001-02-23 17:55:21 +000010192 buf[len++] = cur;
10193 NEXT;
10194 cur = CUR;
10195 while (((cur >= 'a') && (cur <= 'z')) ||
10196 ((cur >= 'A') && (cur <= 'Z')) ||
10197 ((cur >= '0') && (cur <= '9')) ||
10198 (cur == '.') || (cur == '_') ||
10199 (cur == '-')) {
10200 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010201 xmlChar *tmp;
10202
Owen Taylor3473f882001-02-23 17:55:21 +000010203 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010204 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10205 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010206 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010207 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010208 return(NULL);
10209 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010210 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010211 }
10212 buf[len++] = cur;
10213 NEXT;
10214 cur = CUR;
10215 if (cur == 0) {
10216 SHRINK;
10217 GROW;
10218 cur = CUR;
10219 }
10220 }
10221 buf[len] = 0;
10222 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010223 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010224 }
10225 return(buf);
10226}
10227
10228/**
10229 * xmlParseEncodingDecl:
10230 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010231 *
Owen Taylor3473f882001-02-23 17:55:21 +000010232 * parse the XML encoding declaration
10233 *
10234 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10235 *
10236 * this setups the conversion filters.
10237 *
10238 * Returns the encoding value or NULL
10239 */
10240
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010241const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010242xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10243 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010244
10245 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010246 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010247 SKIP(8);
10248 SKIP_BLANKS;
10249 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010250 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010251 return(NULL);
10252 }
10253 NEXT;
10254 SKIP_BLANKS;
10255 if (RAW == '"') {
10256 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010257 encoding = xmlParseEncName(ctxt);
10258 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010259 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010260 xmlFree((xmlChar *) encoding);
10261 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010262 } else
10263 NEXT;
10264 } else if (RAW == '\''){
10265 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010266 encoding = xmlParseEncName(ctxt);
10267 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010268 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010269 xmlFree((xmlChar *) encoding);
10270 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010271 } else
10272 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010273 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010274 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010275 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010276
10277 /*
10278 * Non standard parsing, allowing the user to ignore encoding
10279 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010280 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10281 xmlFree((xmlChar *) encoding);
10282 return(NULL);
10283 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010284
Daniel Veillard6b621b82003-08-11 15:03:34 +000010285 /*
10286 * UTF-16 encoding stwich has already taken place at this stage,
10287 * more over the little-endian/big-endian selection is already done
10288 */
10289 if ((encoding != NULL) &&
10290 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10291 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010292 /*
10293 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010294 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010295 * document is apparently UTF-8 compatible, then raise an
10296 * encoding mismatch fatal error
10297 */
10298 if ((ctxt->encoding == NULL) &&
10299 (ctxt->input->buf != NULL) &&
10300 (ctxt->input->buf->encoder == NULL)) {
10301 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10302 "Document labelled UTF-16 but has UTF-8 content\n");
10303 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010304 if (ctxt->encoding != NULL)
10305 xmlFree((xmlChar *) ctxt->encoding);
10306 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010307 }
10308 /*
10309 * UTF-8 encoding is handled natively
10310 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010311 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010312 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10313 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010314 if (ctxt->encoding != NULL)
10315 xmlFree((xmlChar *) ctxt->encoding);
10316 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010317 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010318 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010319 xmlCharEncodingHandlerPtr handler;
10320
10321 if (ctxt->input->encoding != NULL)
10322 xmlFree((xmlChar *) ctxt->input->encoding);
10323 ctxt->input->encoding = encoding;
10324
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010325 handler = xmlFindCharEncodingHandler((const char *) encoding);
10326 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010327 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10328 /* failed to convert */
10329 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10330 return(NULL);
10331 }
Owen Taylor3473f882001-02-23 17:55:21 +000010332 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010333 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010334 "Unsupported encoding %s\n", encoding);
10335 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010336 }
10337 }
10338 }
10339 return(encoding);
10340}
10341
10342/**
10343 * xmlParseSDDecl:
10344 * @ctxt: an XML parser context
10345 *
10346 * parse the XML standalone declaration
10347 *
10348 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010349 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010350 *
10351 * [ VC: Standalone Document Declaration ]
10352 * TODO The standalone document declaration must have the value "no"
10353 * if any external markup declarations contain declarations of:
10354 * - attributes with default values, if elements to which these
10355 * attributes apply appear in the document without specifications
10356 * of values for these attributes, or
10357 * - entities (other than amp, lt, gt, apos, quot), if references
10358 * to those entities appear in the document, or
10359 * - attributes with values subject to normalization, where the
10360 * attribute appears in the document with a value which will change
10361 * as a result of normalization, or
10362 * - element types with element content, if white space occurs directly
10363 * within any instance of those types.
10364 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010365 * Returns:
10366 * 1 if standalone="yes"
10367 * 0 if standalone="no"
10368 * -2 if standalone attribute is missing or invalid
10369 * (A standalone value of -2 means that the XML declaration was found,
10370 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010371 */
10372
10373int
10374xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010375 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010376
10377 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010378 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010379 SKIP(10);
10380 SKIP_BLANKS;
10381 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010382 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010383 return(standalone);
10384 }
10385 NEXT;
10386 SKIP_BLANKS;
10387 if (RAW == '\''){
10388 NEXT;
10389 if ((RAW == 'n') && (NXT(1) == 'o')) {
10390 standalone = 0;
10391 SKIP(2);
10392 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10393 (NXT(2) == 's')) {
10394 standalone = 1;
10395 SKIP(3);
10396 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010397 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010398 }
10399 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010400 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010401 } else
10402 NEXT;
10403 } else if (RAW == '"'){
10404 NEXT;
10405 if ((RAW == 'n') && (NXT(1) == 'o')) {
10406 standalone = 0;
10407 SKIP(2);
10408 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10409 (NXT(2) == 's')) {
10410 standalone = 1;
10411 SKIP(3);
10412 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010413 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010414 }
10415 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010416 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010417 } else
10418 NEXT;
10419 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010420 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010421 }
10422 }
10423 return(standalone);
10424}
10425
10426/**
10427 * xmlParseXMLDecl:
10428 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010429 *
Owen Taylor3473f882001-02-23 17:55:21 +000010430 * parse an XML declaration header
10431 *
10432 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10433 */
10434
10435void
10436xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10437 xmlChar *version;
10438
10439 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010440 * This value for standalone indicates that the document has an
10441 * XML declaration but it does not have a standalone attribute.
10442 * It will be overwritten later if a standalone attribute is found.
10443 */
10444 ctxt->input->standalone = -2;
10445
10446 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010447 * We know that '<?xml' is here.
10448 */
10449 SKIP(5);
10450
William M. Brack76e95df2003-10-18 16:20:14 +000010451 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10453 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010454 }
10455 SKIP_BLANKS;
10456
10457 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010458 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010459 */
10460 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010461 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010462 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010463 } else {
10464 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10465 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010466 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010467 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010468 if (ctxt->options & XML_PARSE_OLD10) {
10469 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10470 "Unsupported version '%s'\n",
10471 version);
10472 } else {
10473 if ((version[0] == '1') && ((version[1] == '.'))) {
10474 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10475 "Unsupported version '%s'\n",
10476 version, NULL);
10477 } else {
10478 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10479 "Unsupported version '%s'\n",
10480 version);
10481 }
10482 }
Daniel Veillard19840942001-11-29 16:11:38 +000010483 }
10484 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010485 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010486 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010487 }
Owen Taylor3473f882001-02-23 17:55:21 +000010488
10489 /*
10490 * We may have the encoding declaration
10491 */
William M. Brack76e95df2003-10-18 16:20:14 +000010492 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010493 if ((RAW == '?') && (NXT(1) == '>')) {
10494 SKIP(2);
10495 return;
10496 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010497 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010498 }
10499 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010500 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10501 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010502 /*
10503 * The XML REC instructs us to stop parsing right here
10504 */
10505 return;
10506 }
10507
10508 /*
10509 * We may have the standalone status.
10510 */
William M. Brack76e95df2003-10-18 16:20:14 +000010511 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010512 if ((RAW == '?') && (NXT(1) == '>')) {
10513 SKIP(2);
10514 return;
10515 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010516 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010517 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010518
10519 /*
10520 * We can grow the input buffer freely at that point
10521 */
10522 GROW;
10523
Owen Taylor3473f882001-02-23 17:55:21 +000010524 SKIP_BLANKS;
10525 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10526
10527 SKIP_BLANKS;
10528 if ((RAW == '?') && (NXT(1) == '>')) {
10529 SKIP(2);
10530 } else if (RAW == '>') {
10531 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010532 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010533 NEXT;
10534 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010535 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010536 MOVETO_ENDTAG(CUR_PTR);
10537 NEXT;
10538 }
10539}
10540
10541/**
10542 * xmlParseMisc:
10543 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010544 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010545 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010546 *
10547 * [27] Misc ::= Comment | PI | S
10548 */
10549
10550void
10551xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010552 while ((ctxt->instate != XML_PARSER_EOF) &&
10553 (((RAW == '<') && (NXT(1) == '?')) ||
10554 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10555 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010556 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010557 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010558 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010559 NEXT;
10560 } else
10561 xmlParseComment(ctxt);
10562 }
10563}
10564
10565/**
10566 * xmlParseDocument:
10567 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010568 *
Owen Taylor3473f882001-02-23 17:55:21 +000010569 * parse an XML document (and build a tree if using the standard SAX
10570 * interface).
10571 *
10572 * [1] document ::= prolog element Misc*
10573 *
10574 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10575 *
10576 * Returns 0, -1 in case of error. the parser context is augmented
10577 * as a result of the parsing.
10578 */
10579
10580int
10581xmlParseDocument(xmlParserCtxtPtr ctxt) {
10582 xmlChar start[4];
10583 xmlCharEncoding enc;
10584
10585 xmlInitParser();
10586
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010587 if ((ctxt == NULL) || (ctxt->input == NULL))
10588 return(-1);
10589
Owen Taylor3473f882001-02-23 17:55:21 +000010590 GROW;
10591
10592 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010593 * SAX: detecting the level.
10594 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010595 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010596
10597 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010598 * SAX: beginning of the document processing.
10599 */
10600 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10601 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010602 if (ctxt->instate == XML_PARSER_EOF)
10603 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010604
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010605 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010606 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010607 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010608 * Get the 4 first bytes and decode the charset
10609 * if enc != XML_CHAR_ENCODING_NONE
10610 * plug some encoding conversion routines.
10611 */
10612 start[0] = RAW;
10613 start[1] = NXT(1);
10614 start[2] = NXT(2);
10615 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010616 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010617 if (enc != XML_CHAR_ENCODING_NONE) {
10618 xmlSwitchEncoding(ctxt, enc);
10619 }
Owen Taylor3473f882001-02-23 17:55:21 +000010620 }
10621
10622
10623 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010624 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010625 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010626 }
10627
10628 /*
10629 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010630 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010631 * than just the first line, unless the amount of data is really
10632 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010633 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010634 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10635 GROW;
10636 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010637 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010638
10639 /*
10640 * Note that we will switch encoding on the fly.
10641 */
10642 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010643 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10644 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010645 /*
10646 * The XML REC instructs us to stop parsing right here
10647 */
10648 return(-1);
10649 }
10650 ctxt->standalone = ctxt->input->standalone;
10651 SKIP_BLANKS;
10652 } else {
10653 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10654 }
10655 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10656 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010657 if (ctxt->instate == XML_PARSER_EOF)
10658 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010659 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10660 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10661 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10662 }
Owen Taylor3473f882001-02-23 17:55:21 +000010663
10664 /*
10665 * The Misc part of the Prolog
10666 */
10667 GROW;
10668 xmlParseMisc(ctxt);
10669
10670 /*
10671 * Then possibly doc type declaration(s) and more Misc
10672 * (doctypedecl Misc*)?
10673 */
10674 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010675 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010676
10677 ctxt->inSubset = 1;
10678 xmlParseDocTypeDecl(ctxt);
10679 if (RAW == '[') {
10680 ctxt->instate = XML_PARSER_DTD;
10681 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010682 if (ctxt->instate == XML_PARSER_EOF)
10683 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010684 }
10685
10686 /*
10687 * Create and update the external subset.
10688 */
10689 ctxt->inSubset = 2;
10690 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10691 (!ctxt->disableSAX))
10692 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10693 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010694 if (ctxt->instate == XML_PARSER_EOF)
10695 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010696 ctxt->inSubset = 0;
10697
Daniel Veillardac4118d2008-01-11 05:27:32 +000010698 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010699
10700 ctxt->instate = XML_PARSER_PROLOG;
10701 xmlParseMisc(ctxt);
10702 }
10703
10704 /*
10705 * Time to start parsing the tree itself
10706 */
10707 GROW;
10708 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010709 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10710 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010711 } else {
10712 ctxt->instate = XML_PARSER_CONTENT;
10713 xmlParseElement(ctxt);
10714 ctxt->instate = XML_PARSER_EPILOG;
10715
10716
10717 /*
10718 * The Misc part at the end
10719 */
10720 xmlParseMisc(ctxt);
10721
Daniel Veillard561b7f82002-03-20 21:55:57 +000010722 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010723 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010724 }
10725 ctxt->instate = XML_PARSER_EOF;
10726 }
10727
10728 /*
10729 * SAX: end of the document processing.
10730 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010731 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010732 ctxt->sax->endDocument(ctxt->userData);
10733
Daniel Veillard5997aca2002-03-18 18:36:20 +000010734 /*
10735 * Remove locally kept entity definitions if the tree was not built
10736 */
10737 if ((ctxt->myDoc != NULL) &&
10738 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10739 xmlFreeDoc(ctxt->myDoc);
10740 ctxt->myDoc = NULL;
10741 }
10742
Daniel Veillardae0765b2008-07-31 19:54:59 +000010743 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10744 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10745 if (ctxt->valid)
10746 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10747 if (ctxt->nsWellFormed)
10748 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10749 if (ctxt->options & XML_PARSE_OLD10)
10750 ctxt->myDoc->properties |= XML_DOC_OLD10;
10751 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010752 if (! ctxt->wellFormed) {
10753 ctxt->valid = 0;
10754 return(-1);
10755 }
Owen Taylor3473f882001-02-23 17:55:21 +000010756 return(0);
10757}
10758
10759/**
10760 * xmlParseExtParsedEnt:
10761 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010762 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010763 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010764 * An external general parsed entity is well-formed if it matches the
10765 * production labeled extParsedEnt.
10766 *
10767 * [78] extParsedEnt ::= TextDecl? content
10768 *
10769 * Returns 0, -1 in case of error. the parser context is augmented
10770 * as a result of the parsing.
10771 */
10772
10773int
10774xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10775 xmlChar start[4];
10776 xmlCharEncoding enc;
10777
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010778 if ((ctxt == NULL) || (ctxt->input == NULL))
10779 return(-1);
10780
Owen Taylor3473f882001-02-23 17:55:21 +000010781 xmlDefaultSAXHandlerInit();
10782
Daniel Veillard309f81d2003-09-23 09:02:53 +000010783 xmlDetectSAX2(ctxt);
10784
Owen Taylor3473f882001-02-23 17:55:21 +000010785 GROW;
10786
10787 /*
10788 * SAX: beginning of the document processing.
10789 */
10790 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10791 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10792
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010793 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010794 * Get the 4 first bytes and decode the charset
10795 * if enc != XML_CHAR_ENCODING_NONE
10796 * plug some encoding conversion routines.
10797 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010798 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10799 start[0] = RAW;
10800 start[1] = NXT(1);
10801 start[2] = NXT(2);
10802 start[3] = NXT(3);
10803 enc = xmlDetectCharEncoding(start, 4);
10804 if (enc != XML_CHAR_ENCODING_NONE) {
10805 xmlSwitchEncoding(ctxt, enc);
10806 }
Owen Taylor3473f882001-02-23 17:55:21 +000010807 }
10808
10809
10810 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010811 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010812 }
10813
10814 /*
10815 * Check for the XMLDecl in the Prolog.
10816 */
10817 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010818 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010819
10820 /*
10821 * Note that we will switch encoding on the fly.
10822 */
10823 xmlParseXMLDecl(ctxt);
10824 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10825 /*
10826 * The XML REC instructs us to stop parsing right here
10827 */
10828 return(-1);
10829 }
10830 SKIP_BLANKS;
10831 } else {
10832 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10833 }
10834 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10835 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010836 if (ctxt->instate == XML_PARSER_EOF)
10837 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010838
10839 /*
10840 * Doing validity checking on chunk doesn't make sense
10841 */
10842 ctxt->instate = XML_PARSER_CONTENT;
10843 ctxt->validate = 0;
10844 ctxt->loadsubset = 0;
10845 ctxt->depth = 0;
10846
10847 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010848 if (ctxt->instate == XML_PARSER_EOF)
10849 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010850
Owen Taylor3473f882001-02-23 17:55:21 +000010851 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010852 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010853 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010854 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010855 }
10856
10857 /*
10858 * SAX: end of the document processing.
10859 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010860 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010861 ctxt->sax->endDocument(ctxt->userData);
10862
10863 if (! ctxt->wellFormed) return(-1);
10864 return(0);
10865}
10866
Daniel Veillard73b013f2003-09-30 12:36:01 +000010867#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010868/************************************************************************
10869 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010870 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010871 * *
10872 ************************************************************************/
10873
10874/**
10875 * xmlParseLookupSequence:
10876 * @ctxt: an XML parser context
10877 * @first: the first char to lookup
10878 * @next: the next char to lookup or zero
10879 * @third: the next char to lookup or zero
10880 *
10881 * Try to find if a sequence (first, next, third) or just (first next) or
10882 * (first) is available in the input stream.
10883 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10884 * to avoid rescanning sequences of bytes, it DOES change the state of the
10885 * parser, do not use liberally.
10886 *
10887 * Returns the index to the current parsing point if the full sequence
10888 * is available, -1 otherwise.
10889 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010890static int
Owen Taylor3473f882001-02-23 17:55:21 +000010891xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10892 xmlChar next, xmlChar third) {
10893 int base, len;
10894 xmlParserInputPtr in;
10895 const xmlChar *buf;
10896
10897 in = ctxt->input;
10898 if (in == NULL) return(-1);
10899 base = in->cur - in->base;
10900 if (base < 0) return(-1);
10901 if (ctxt->checkIndex > base)
10902 base = ctxt->checkIndex;
10903 if (in->buf == NULL) {
10904 buf = in->base;
10905 len = in->length;
10906 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010907 buf = xmlBufContent(in->buf->buffer);
10908 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010909 }
10910 /* take into account the sequence length */
10911 if (third) len -= 2;
10912 else if (next) len --;
10913 for (;base < len;base++) {
10914 if (buf[base] == first) {
10915 if (third != 0) {
10916 if ((buf[base + 1] != next) ||
10917 (buf[base + 2] != third)) continue;
10918 } else if (next != 0) {
10919 if (buf[base + 1] != next) continue;
10920 }
10921 ctxt->checkIndex = 0;
10922#ifdef DEBUG_PUSH
10923 if (next == 0)
10924 xmlGenericError(xmlGenericErrorContext,
10925 "PP: lookup '%c' found at %d\n",
10926 first, base);
10927 else if (third == 0)
10928 xmlGenericError(xmlGenericErrorContext,
10929 "PP: lookup '%c%c' found at %d\n",
10930 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010931 else
Owen Taylor3473f882001-02-23 17:55:21 +000010932 xmlGenericError(xmlGenericErrorContext,
10933 "PP: lookup '%c%c%c' found at %d\n",
10934 first, next, third, base);
10935#endif
10936 return(base - (in->cur - in->base));
10937 }
10938 }
10939 ctxt->checkIndex = base;
10940#ifdef DEBUG_PUSH
10941 if (next == 0)
10942 xmlGenericError(xmlGenericErrorContext,
10943 "PP: lookup '%c' failed\n", first);
10944 else if (third == 0)
10945 xmlGenericError(xmlGenericErrorContext,
10946 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010947 else
Owen Taylor3473f882001-02-23 17:55:21 +000010948 xmlGenericError(xmlGenericErrorContext,
10949 "PP: lookup '%c%c%c' failed\n", first, next, third);
10950#endif
10951 return(-1);
10952}
10953
10954/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010955 * xmlParseGetLasts:
10956 * @ctxt: an XML parser context
10957 * @lastlt: pointer to store the last '<' from the input
10958 * @lastgt: pointer to store the last '>' from the input
10959 *
10960 * Lookup the last < and > in the current chunk
10961 */
10962static void
10963xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10964 const xmlChar **lastgt) {
10965 const xmlChar *tmp;
10966
10967 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10968 xmlGenericError(xmlGenericErrorContext,
10969 "Internal error: xmlParseGetLasts\n");
10970 return;
10971 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010972 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010973 tmp = ctxt->input->end;
10974 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010975 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010976 if (tmp < ctxt->input->base) {
10977 *lastlt = NULL;
10978 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010979 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010980 *lastlt = tmp;
10981 tmp++;
10982 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10983 if (*tmp == '\'') {
10984 tmp++;
10985 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10986 if (tmp < ctxt->input->end) tmp++;
10987 } else if (*tmp == '"') {
10988 tmp++;
10989 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10990 if (tmp < ctxt->input->end) tmp++;
10991 } else
10992 tmp++;
10993 }
10994 if (tmp < ctxt->input->end)
10995 *lastgt = tmp;
10996 else {
10997 tmp = *lastlt;
10998 tmp--;
10999 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11000 if (tmp >= ctxt->input->base)
11001 *lastgt = tmp;
11002 else
11003 *lastgt = NULL;
11004 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011005 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011006 } else {
11007 *lastlt = NULL;
11008 *lastgt = NULL;
11009 }
11010}
11011/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011012 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011013 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011014 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011015 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011016 *
11017 * Check that the block of characters is okay as SCdata content [20]
11018 *
11019 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011020 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011021 */
11022static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011023xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011024 int ix;
11025 unsigned char c;
11026 int codepoint;
11027
11028 if ((utf == NULL) || (len <= 0))
11029 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011030
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011031 for (ix = 0; ix < len;) { /* string is 0-terminated */
11032 c = utf[ix];
11033 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11034 if (c >= 0x20)
11035 ix++;
11036 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11037 ix++;
11038 else
11039 return(-ix);
11040 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011041 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011042 if ((utf[ix+1] & 0xc0 ) != 0x80)
11043 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011044 codepoint = (utf[ix] & 0x1f) << 6;
11045 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011046 if (!xmlIsCharQ(codepoint))
11047 return(-ix);
11048 ix += 2;
11049 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011050 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011051 if (((utf[ix+1] & 0xc0) != 0x80) ||
11052 ((utf[ix+2] & 0xc0) != 0x80))
11053 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011054 codepoint = (utf[ix] & 0xf) << 12;
11055 codepoint |= (utf[ix+1] & 0x3f) << 6;
11056 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011057 if (!xmlIsCharQ(codepoint))
11058 return(-ix);
11059 ix += 3;
11060 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011061 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011062 if (((utf[ix+1] & 0xc0) != 0x80) ||
11063 ((utf[ix+2] & 0xc0) != 0x80) ||
11064 ((utf[ix+3] & 0xc0) != 0x80))
11065 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011066 codepoint = (utf[ix] & 0x7) << 18;
11067 codepoint |= (utf[ix+1] & 0x3f) << 12;
11068 codepoint |= (utf[ix+2] & 0x3f) << 6;
11069 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011070 if (!xmlIsCharQ(codepoint))
11071 return(-ix);
11072 ix += 4;
11073 } else /* unknown encoding */
11074 return(-ix);
11075 }
11076 return(ix);
11077}
11078
11079/**
Owen Taylor3473f882001-02-23 17:55:21 +000011080 * xmlParseTryOrFinish:
11081 * @ctxt: an XML parser context
11082 * @terminate: last chunk indicator
11083 *
11084 * Try to progress on parsing
11085 *
11086 * Returns zero if no parsing was possible
11087 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011088static int
Owen Taylor3473f882001-02-23 17:55:21 +000011089xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11090 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011091 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011092 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011093 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011094
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011095 if (ctxt->input == NULL)
11096 return(0);
11097
Owen Taylor3473f882001-02-23 17:55:21 +000011098#ifdef DEBUG_PUSH
11099 switch (ctxt->instate) {
11100 case XML_PARSER_EOF:
11101 xmlGenericError(xmlGenericErrorContext,
11102 "PP: try EOF\n"); break;
11103 case XML_PARSER_START:
11104 xmlGenericError(xmlGenericErrorContext,
11105 "PP: try START\n"); break;
11106 case XML_PARSER_MISC:
11107 xmlGenericError(xmlGenericErrorContext,
11108 "PP: try MISC\n");break;
11109 case XML_PARSER_COMMENT:
11110 xmlGenericError(xmlGenericErrorContext,
11111 "PP: try COMMENT\n");break;
11112 case XML_PARSER_PROLOG:
11113 xmlGenericError(xmlGenericErrorContext,
11114 "PP: try PROLOG\n");break;
11115 case XML_PARSER_START_TAG:
11116 xmlGenericError(xmlGenericErrorContext,
11117 "PP: try START_TAG\n");break;
11118 case XML_PARSER_CONTENT:
11119 xmlGenericError(xmlGenericErrorContext,
11120 "PP: try CONTENT\n");break;
11121 case XML_PARSER_CDATA_SECTION:
11122 xmlGenericError(xmlGenericErrorContext,
11123 "PP: try CDATA_SECTION\n");break;
11124 case XML_PARSER_END_TAG:
11125 xmlGenericError(xmlGenericErrorContext,
11126 "PP: try END_TAG\n");break;
11127 case XML_PARSER_ENTITY_DECL:
11128 xmlGenericError(xmlGenericErrorContext,
11129 "PP: try ENTITY_DECL\n");break;
11130 case XML_PARSER_ENTITY_VALUE:
11131 xmlGenericError(xmlGenericErrorContext,
11132 "PP: try ENTITY_VALUE\n");break;
11133 case XML_PARSER_ATTRIBUTE_VALUE:
11134 xmlGenericError(xmlGenericErrorContext,
11135 "PP: try ATTRIBUTE_VALUE\n");break;
11136 case XML_PARSER_DTD:
11137 xmlGenericError(xmlGenericErrorContext,
11138 "PP: try DTD\n");break;
11139 case XML_PARSER_EPILOG:
11140 xmlGenericError(xmlGenericErrorContext,
11141 "PP: try EPILOG\n");break;
11142 case XML_PARSER_PI:
11143 xmlGenericError(xmlGenericErrorContext,
11144 "PP: try PI\n");break;
11145 case XML_PARSER_IGNORE:
11146 xmlGenericError(xmlGenericErrorContext,
11147 "PP: try IGNORE\n");break;
11148 }
11149#endif
11150
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011151 if ((ctxt->input != NULL) &&
11152 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011153 xmlSHRINK(ctxt);
11154 ctxt->checkIndex = 0;
11155 }
11156 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011157
Daniel Veillarde50ba812013-04-11 15:54:51 +080011158 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011159 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011160 return(0);
11161
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011162 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011163 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011164 avail = ctxt->input->length -
11165 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011166 else {
11167 /*
11168 * If we are operating on converted input, try to flush
11169 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011170 * buffer. But do not do this in document start where
11171 * encoding="..." may not have been read and we work on a
11172 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011173 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011174 if ((ctxt->instate != XML_PARSER_START) &&
11175 (ctxt->input->buf->raw != NULL) &&
11176 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011177 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11178 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011179 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011180
11181 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011182 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11183 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011184 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011185 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011186 (ctxt->input->cur - ctxt->input->base);
11187 }
Owen Taylor3473f882001-02-23 17:55:21 +000011188 if (avail < 1)
11189 goto done;
11190 switch (ctxt->instate) {
11191 case XML_PARSER_EOF:
11192 /*
11193 * Document parsing is done !
11194 */
11195 goto done;
11196 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011197 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11198 xmlChar start[4];
11199 xmlCharEncoding enc;
11200
11201 /*
11202 * Very first chars read from the document flow.
11203 */
11204 if (avail < 4)
11205 goto done;
11206
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011207 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011208 * Get the 4 first bytes and decode the charset
11209 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011210 * plug some encoding conversion routines,
11211 * else xmlSwitchEncoding will set to (default)
11212 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011213 */
11214 start[0] = RAW;
11215 start[1] = NXT(1);
11216 start[2] = NXT(2);
11217 start[3] = NXT(3);
11218 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011219 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011220 break;
11221 }
Owen Taylor3473f882001-02-23 17:55:21 +000011222
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011223 if (avail < 2)
11224 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011225 cur = ctxt->input->cur[0];
11226 next = ctxt->input->cur[1];
11227 if (cur == 0) {
11228 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11229 ctxt->sax->setDocumentLocator(ctxt->userData,
11230 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011231 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011232 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011233#ifdef DEBUG_PUSH
11234 xmlGenericError(xmlGenericErrorContext,
11235 "PP: entering EOF\n");
11236#endif
11237 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11238 ctxt->sax->endDocument(ctxt->userData);
11239 goto done;
11240 }
11241 if ((cur == '<') && (next == '?')) {
11242 /* PI or XML decl */
11243 if (avail < 5) return(ret);
11244 if ((!terminate) &&
11245 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11246 return(ret);
11247 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11248 ctxt->sax->setDocumentLocator(ctxt->userData,
11249 &xmlDefaultSAXLocator);
11250 if ((ctxt->input->cur[2] == 'x') &&
11251 (ctxt->input->cur[3] == 'm') &&
11252 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011253 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011254 ret += 5;
11255#ifdef DEBUG_PUSH
11256 xmlGenericError(xmlGenericErrorContext,
11257 "PP: Parsing XML Decl\n");
11258#endif
11259 xmlParseXMLDecl(ctxt);
11260 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11261 /*
11262 * The XML REC instructs us to stop parsing right
11263 * here
11264 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011265 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011266 return(0);
11267 }
11268 ctxt->standalone = ctxt->input->standalone;
11269 if ((ctxt->encoding == NULL) &&
11270 (ctxt->input->encoding != NULL))
11271 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11272 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11273 (!ctxt->disableSAX))
11274 ctxt->sax->startDocument(ctxt->userData);
11275 ctxt->instate = XML_PARSER_MISC;
11276#ifdef DEBUG_PUSH
11277 xmlGenericError(xmlGenericErrorContext,
11278 "PP: entering MISC\n");
11279#endif
11280 } else {
11281 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11282 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11283 (!ctxt->disableSAX))
11284 ctxt->sax->startDocument(ctxt->userData);
11285 ctxt->instate = XML_PARSER_MISC;
11286#ifdef DEBUG_PUSH
11287 xmlGenericError(xmlGenericErrorContext,
11288 "PP: entering MISC\n");
11289#endif
11290 }
11291 } else {
11292 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11293 ctxt->sax->setDocumentLocator(ctxt->userData,
11294 &xmlDefaultSAXLocator);
11295 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011296 if (ctxt->version == NULL) {
11297 xmlErrMemory(ctxt, NULL);
11298 break;
11299 }
Owen Taylor3473f882001-02-23 17:55:21 +000011300 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11301 (!ctxt->disableSAX))
11302 ctxt->sax->startDocument(ctxt->userData);
11303 ctxt->instate = XML_PARSER_MISC;
11304#ifdef DEBUG_PUSH
11305 xmlGenericError(xmlGenericErrorContext,
11306 "PP: entering MISC\n");
11307#endif
11308 }
11309 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011310 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011311 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011312 const xmlChar *prefix = NULL;
11313 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011314 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011315
11316 if ((avail < 2) && (ctxt->inputNr == 1))
11317 goto done;
11318 cur = ctxt->input->cur[0];
11319 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011320 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011321 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011322 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11323 ctxt->sax->endDocument(ctxt->userData);
11324 goto done;
11325 }
11326 if (!terminate) {
11327 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011328 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011329 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011330 goto done;
11331 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11332 goto done;
11333 }
11334 }
11335 if (ctxt->spaceNr == 0)
11336 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011337 else if (*ctxt->space == -2)
11338 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011339 else
11340 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011341#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011342 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011343#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011344 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011345#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011346 else
11347 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011348#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011349 if (ctxt->instate == XML_PARSER_EOF)
11350 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011351 if (name == NULL) {
11352 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011353 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011354 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11355 ctxt->sax->endDocument(ctxt->userData);
11356 goto done;
11357 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011358#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011359 /*
11360 * [ VC: Root Element Type ]
11361 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011362 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011363 */
11364 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11365 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11366 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011367#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011368
11369 /*
11370 * Check for an Empty Element.
11371 */
11372 if ((RAW == '/') && (NXT(1) == '>')) {
11373 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011374
11375 if (ctxt->sax2) {
11376 if ((ctxt->sax != NULL) &&
11377 (ctxt->sax->endElementNs != NULL) &&
11378 (!ctxt->disableSAX))
11379 ctxt->sax->endElementNs(ctxt->userData, name,
11380 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011381 if (ctxt->nsNr - nsNr > 0)
11382 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011383#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011384 } else {
11385 if ((ctxt->sax != NULL) &&
11386 (ctxt->sax->endElement != NULL) &&
11387 (!ctxt->disableSAX))
11388 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011389#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011390 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011391 if (ctxt->instate == XML_PARSER_EOF)
11392 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011393 spacePop(ctxt);
11394 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011395 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011396 } else {
11397 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011398 }
Daniel Veillard65686452012-07-19 18:25:01 +080011399 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011400 break;
11401 }
11402 if (RAW == '>') {
11403 NEXT;
11404 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011405 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011406 "Couldn't find end of Start Tag %s\n",
11407 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011408 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011409 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011410 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011411 if (ctxt->sax2)
11412 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011413#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011414 else
11415 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011416#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011417
Daniel Veillarda880b122003-04-21 21:36:41 +000011418 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011419 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011420 break;
11421 }
11422 case XML_PARSER_CONTENT: {
11423 const xmlChar *test;
11424 unsigned int cons;
11425 if ((avail < 2) && (ctxt->inputNr == 1))
11426 goto done;
11427 cur = ctxt->input->cur[0];
11428 next = ctxt->input->cur[1];
11429
11430 test = CUR_PTR;
11431 cons = ctxt->input->consumed;
11432 if ((cur == '<') && (next == '/')) {
11433 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011434 break;
11435 } else if ((cur == '<') && (next == '?')) {
11436 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011437 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11438 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011439 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011440 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011441 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011442 ctxt->instate = XML_PARSER_CONTENT;
11443 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011444 } else if ((cur == '<') && (next != '!')) {
11445 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011446 break;
11447 } else if ((cur == '<') && (next == '!') &&
11448 (ctxt->input->cur[2] == '-') &&
11449 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011450 int term;
11451
11452 if (avail < 4)
11453 goto done;
11454 ctxt->input->cur += 4;
11455 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11456 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011457 if ((!terminate) && (term < 0)) {
11458 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011459 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011460 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011461 xmlParseComment(ctxt);
11462 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011463 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011464 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11465 (ctxt->input->cur[2] == '[') &&
11466 (ctxt->input->cur[3] == 'C') &&
11467 (ctxt->input->cur[4] == 'D') &&
11468 (ctxt->input->cur[5] == 'A') &&
11469 (ctxt->input->cur[6] == 'T') &&
11470 (ctxt->input->cur[7] == 'A') &&
11471 (ctxt->input->cur[8] == '[')) {
11472 SKIP(9);
11473 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011474 break;
11475 } else if ((cur == '<') && (next == '!') &&
11476 (avail < 9)) {
11477 goto done;
11478 } else if (cur == '&') {
11479 if ((!terminate) &&
11480 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11481 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011482 xmlParseReference(ctxt);
11483 } else {
11484 /* TODO Avoid the extra copy, handle directly !!! */
11485 /*
11486 * Goal of the following test is:
11487 * - minimize calls to the SAX 'character' callback
11488 * when they are mergeable
11489 * - handle an problem for isBlank when we only parse
11490 * a sequence of blank chars and the next one is
11491 * not available to check against '<' presence.
11492 * - tries to homogenize the differences in SAX
11493 * callbacks between the push and pull versions
11494 * of the parser.
11495 */
11496 if ((ctxt->inputNr == 1) &&
11497 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11498 if (!terminate) {
11499 if (ctxt->progressive) {
11500 if ((lastlt == NULL) ||
11501 (ctxt->input->cur > lastlt))
11502 goto done;
11503 } else if (xmlParseLookupSequence(ctxt,
11504 '<', 0, 0) < 0) {
11505 goto done;
11506 }
11507 }
11508 }
11509 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011510 xmlParseCharData(ctxt, 0);
11511 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011512 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011513 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11514 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011515 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011516 break;
11517 }
11518 break;
11519 }
11520 case XML_PARSER_END_TAG:
11521 if (avail < 2)
11522 goto done;
11523 if (!terminate) {
11524 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011525 /* > can be found unescaped in attribute values */
11526 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011527 goto done;
11528 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11529 goto done;
11530 }
11531 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011532 if (ctxt->sax2) {
11533 xmlParseEndTag2(ctxt,
11534 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11535 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011536 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011537 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011538 }
11539#ifdef LIBXML_SAX1_ENABLED
11540 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011541 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011542#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011543 if (ctxt->instate == XML_PARSER_EOF) {
11544 /* Nothing */
11545 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011546 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011547 } else {
11548 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011549 }
11550 break;
11551 case XML_PARSER_CDATA_SECTION: {
11552 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011553 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011554 * cdataBlock merge back contiguous callbacks.
11555 */
11556 int base;
11557
11558 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11559 if (base < 0) {
11560 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011561 int tmp;
11562
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011563 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011564 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011565 if (tmp < 0) {
11566 tmp = -tmp;
11567 ctxt->input->cur += tmp;
11568 goto encoding_error;
11569 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011570 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11571 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011572 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011573 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011574 else if (ctxt->sax->characters != NULL)
11575 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011576 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011577 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011578 if (ctxt->instate == XML_PARSER_EOF)
11579 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011580 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011581 ctxt->checkIndex = 0;
11582 }
11583 goto done;
11584 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011585 int tmp;
11586
David Kilzer4f8606c2016-01-05 13:38:09 -080011587 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011588 if ((tmp < 0) || (tmp != base)) {
11589 tmp = -tmp;
11590 ctxt->input->cur += tmp;
11591 goto encoding_error;
11592 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011593 if ((ctxt->sax != NULL) && (base == 0) &&
11594 (ctxt->sax->cdataBlock != NULL) &&
11595 (!ctxt->disableSAX)) {
11596 /*
11597 * Special case to provide identical behaviour
11598 * between pull and push parsers on enpty CDATA
11599 * sections
11600 */
11601 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11602 (!strncmp((const char *)&ctxt->input->cur[-9],
11603 "<![CDATA[", 9)))
11604 ctxt->sax->cdataBlock(ctxt->userData,
11605 BAD_CAST "", 0);
11606 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011607 (!ctxt->disableSAX)) {
11608 if (ctxt->sax->cdataBlock != NULL)
11609 ctxt->sax->cdataBlock(ctxt->userData,
11610 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011611 else if (ctxt->sax->characters != NULL)
11612 ctxt->sax->characters(ctxt->userData,
11613 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011614 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011615 if (ctxt->instate == XML_PARSER_EOF)
11616 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011617 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011618 ctxt->checkIndex = 0;
11619 ctxt->instate = XML_PARSER_CONTENT;
11620#ifdef DEBUG_PUSH
11621 xmlGenericError(xmlGenericErrorContext,
11622 "PP: entering CONTENT\n");
11623#endif
11624 }
11625 break;
11626 }
Owen Taylor3473f882001-02-23 17:55:21 +000011627 case XML_PARSER_MISC:
11628 SKIP_BLANKS;
11629 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011630 avail = ctxt->input->length -
11631 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011632 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011633 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011634 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011635 if (avail < 2)
11636 goto done;
11637 cur = ctxt->input->cur[0];
11638 next = ctxt->input->cur[1];
11639 if ((cur == '<') && (next == '?')) {
11640 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011641 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11642 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011643 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011644 }
Owen Taylor3473f882001-02-23 17:55:21 +000011645#ifdef DEBUG_PUSH
11646 xmlGenericError(xmlGenericErrorContext,
11647 "PP: Parsing PI\n");
11648#endif
11649 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011650 if (ctxt->instate == XML_PARSER_EOF)
11651 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011652 ctxt->instate = XML_PARSER_MISC;
11653 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011654 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011655 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011656 (ctxt->input->cur[2] == '-') &&
11657 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011658 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011659 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11660 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011661 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011662 }
Owen Taylor3473f882001-02-23 17:55:21 +000011663#ifdef DEBUG_PUSH
11664 xmlGenericError(xmlGenericErrorContext,
11665 "PP: Parsing Comment\n");
11666#endif
11667 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011668 if (ctxt->instate == XML_PARSER_EOF)
11669 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011670 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011671 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011672 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011673 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011674 (ctxt->input->cur[2] == 'D') &&
11675 (ctxt->input->cur[3] == 'O') &&
11676 (ctxt->input->cur[4] == 'C') &&
11677 (ctxt->input->cur[5] == 'T') &&
11678 (ctxt->input->cur[6] == 'Y') &&
11679 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011680 (ctxt->input->cur[8] == 'E')) {
11681 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011682 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11683 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011684 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011685 }
Owen Taylor3473f882001-02-23 17:55:21 +000011686#ifdef DEBUG_PUSH
11687 xmlGenericError(xmlGenericErrorContext,
11688 "PP: Parsing internal subset\n");
11689#endif
11690 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011691 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011692 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011693 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011694 if (ctxt->instate == XML_PARSER_EOF)
11695 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011696 if (RAW == '[') {
11697 ctxt->instate = XML_PARSER_DTD;
11698#ifdef DEBUG_PUSH
11699 xmlGenericError(xmlGenericErrorContext,
11700 "PP: entering DTD\n");
11701#endif
11702 } else {
11703 /*
11704 * Create and update the external subset.
11705 */
11706 ctxt->inSubset = 2;
11707 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11708 (ctxt->sax->externalSubset != NULL))
11709 ctxt->sax->externalSubset(ctxt->userData,
11710 ctxt->intSubName, ctxt->extSubSystem,
11711 ctxt->extSubURI);
11712 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011713 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011714 ctxt->instate = XML_PARSER_PROLOG;
11715#ifdef DEBUG_PUSH
11716 xmlGenericError(xmlGenericErrorContext,
11717 "PP: entering PROLOG\n");
11718#endif
11719 }
11720 } else if ((cur == '<') && (next == '!') &&
11721 (avail < 9)) {
11722 goto done;
11723 } else {
11724 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011725 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011726 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011727#ifdef DEBUG_PUSH
11728 xmlGenericError(xmlGenericErrorContext,
11729 "PP: entering START_TAG\n");
11730#endif
11731 }
11732 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011733 case XML_PARSER_PROLOG:
11734 SKIP_BLANKS;
11735 if (ctxt->input->buf == NULL)
11736 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11737 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011738 avail = xmlBufUse(ctxt->input->buf->buffer) -
11739 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011740 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011741 goto done;
11742 cur = ctxt->input->cur[0];
11743 next = ctxt->input->cur[1];
11744 if ((cur == '<') && (next == '?')) {
11745 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011746 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11747 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011748 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011749 }
Owen Taylor3473f882001-02-23 17:55:21 +000011750#ifdef DEBUG_PUSH
11751 xmlGenericError(xmlGenericErrorContext,
11752 "PP: Parsing PI\n");
11753#endif
11754 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011755 if (ctxt->instate == XML_PARSER_EOF)
11756 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011757 ctxt->instate = XML_PARSER_PROLOG;
11758 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011759 } else if ((cur == '<') && (next == '!') &&
11760 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11761 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011762 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11763 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011764 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011765 }
Owen Taylor3473f882001-02-23 17:55:21 +000011766#ifdef DEBUG_PUSH
11767 xmlGenericError(xmlGenericErrorContext,
11768 "PP: Parsing Comment\n");
11769#endif
11770 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011771 if (ctxt->instate == XML_PARSER_EOF)
11772 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011773 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011774 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011775 } else if ((cur == '<') && (next == '!') &&
11776 (avail < 4)) {
11777 goto done;
11778 } else {
11779 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011780 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011781 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011782 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011783#ifdef DEBUG_PUSH
11784 xmlGenericError(xmlGenericErrorContext,
11785 "PP: entering START_TAG\n");
11786#endif
11787 }
11788 break;
11789 case XML_PARSER_EPILOG:
11790 SKIP_BLANKS;
11791 if (ctxt->input->buf == NULL)
11792 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11793 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011794 avail = xmlBufUse(ctxt->input->buf->buffer) -
11795 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011796 if (avail < 2)
11797 goto done;
11798 cur = ctxt->input->cur[0];
11799 next = ctxt->input->cur[1];
11800 if ((cur == '<') && (next == '?')) {
11801 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011802 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11803 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011804 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011805 }
Owen Taylor3473f882001-02-23 17:55:21 +000011806#ifdef DEBUG_PUSH
11807 xmlGenericError(xmlGenericErrorContext,
11808 "PP: Parsing PI\n");
11809#endif
11810 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011811 if (ctxt->instate == XML_PARSER_EOF)
11812 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011813 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011814 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011815 } else if ((cur == '<') && (next == '!') &&
11816 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11817 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011818 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11819 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011820 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011821 }
Owen Taylor3473f882001-02-23 17:55:21 +000011822#ifdef DEBUG_PUSH
11823 xmlGenericError(xmlGenericErrorContext,
11824 "PP: Parsing Comment\n");
11825#endif
11826 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011827 if (ctxt->instate == XML_PARSER_EOF)
11828 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011829 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011830 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011831 } else if ((cur == '<') && (next == '!') &&
11832 (avail < 4)) {
11833 goto done;
11834 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011835 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011836 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011837#ifdef DEBUG_PUSH
11838 xmlGenericError(xmlGenericErrorContext,
11839 "PP: entering EOF\n");
11840#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011841 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011842 ctxt->sax->endDocument(ctxt->userData);
11843 goto done;
11844 }
11845 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011846 case XML_PARSER_DTD: {
11847 /*
11848 * Sorry but progressive parsing of the internal subset
11849 * is not expected to be supported. We first check that
11850 * the full content of the internal subset is available and
11851 * the parsing is launched only at that point.
11852 * Internal subset ends up with "']' S? '>'" in an unescaped
11853 * section and not in a ']]>' sequence which are conditional
11854 * sections (whoever argued to keep that crap in XML deserve
11855 * a place in hell !).
11856 */
11857 int base, i;
11858 xmlChar *buf;
11859 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011860 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011861
11862 base = ctxt->input->cur - ctxt->input->base;
11863 if (base < 0) return(0);
11864 if (ctxt->checkIndex > base)
11865 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011866 buf = xmlBufContent(ctxt->input->buf->buffer);
11867 use = xmlBufUse(ctxt->input->buf->buffer);
11868 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011869 if (quote != 0) {
11870 if (buf[base] == quote)
11871 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011872 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011873 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011874 if ((quote == 0) && (buf[base] == '<')) {
11875 int found = 0;
11876 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011877 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011878 (buf[base + 1] == '!') &&
11879 (buf[base + 2] == '-') &&
11880 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011881 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011882 if ((buf[base] == '-') &&
11883 (buf[base + 1] == '-') &&
11884 (buf[base + 2] == '>')) {
11885 found = 1;
11886 base += 2;
11887 break;
11888 }
11889 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011890 if (!found) {
11891#if 0
11892 fprintf(stderr, "unfinished comment\n");
11893#endif
11894 break; /* for */
11895 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011896 continue;
11897 }
11898 }
Owen Taylor3473f882001-02-23 17:55:21 +000011899 if (buf[base] == '"') {
11900 quote = '"';
11901 continue;
11902 }
11903 if (buf[base] == '\'') {
11904 quote = '\'';
11905 continue;
11906 }
11907 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011908#if 0
11909 fprintf(stderr, "%c%c%c%c: ", buf[base],
11910 buf[base + 1], buf[base + 2], buf[base + 3]);
11911#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011912 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011913 break;
11914 if (buf[base + 1] == ']') {
11915 /* conditional crap, skip both ']' ! */
11916 base++;
11917 continue;
11918 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011919 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011920 if (buf[base + i] == '>') {
11921#if 0
11922 fprintf(stderr, "found\n");
11923#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011924 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011925 }
11926 if (!IS_BLANK_CH(buf[base + i])) {
11927#if 0
11928 fprintf(stderr, "not found\n");
11929#endif
11930 goto not_end_of_int_subset;
11931 }
Owen Taylor3473f882001-02-23 17:55:21 +000011932 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011933#if 0
11934 fprintf(stderr, "end of stream\n");
11935#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011936 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011937
Owen Taylor3473f882001-02-23 17:55:21 +000011938 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011939not_end_of_int_subset:
11940 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011941 }
11942 /*
11943 * We didn't found the end of the Internal subset
11944 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011945 if (quote == 0)
11946 ctxt->checkIndex = base;
11947 else
11948 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011949#ifdef DEBUG_PUSH
11950 if (next == 0)
11951 xmlGenericError(xmlGenericErrorContext,
11952 "PP: lookup of int subset end filed\n");
11953#endif
11954 goto done;
11955
11956found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011957 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011958 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011959 if (ctxt->instate == XML_PARSER_EOF)
11960 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011961 ctxt->inSubset = 2;
11962 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11963 (ctxt->sax->externalSubset != NULL))
11964 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11965 ctxt->extSubSystem, ctxt->extSubURI);
11966 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011967 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011968 if (ctxt->instate == XML_PARSER_EOF)
11969 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011970 ctxt->instate = XML_PARSER_PROLOG;
11971 ctxt->checkIndex = 0;
11972#ifdef DEBUG_PUSH
11973 xmlGenericError(xmlGenericErrorContext,
11974 "PP: entering PROLOG\n");
11975#endif
11976 break;
11977 }
11978 case XML_PARSER_COMMENT:
11979 xmlGenericError(xmlGenericErrorContext,
11980 "PP: internal error, state == COMMENT\n");
11981 ctxt->instate = XML_PARSER_CONTENT;
11982#ifdef DEBUG_PUSH
11983 xmlGenericError(xmlGenericErrorContext,
11984 "PP: entering CONTENT\n");
11985#endif
11986 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011987 case XML_PARSER_IGNORE:
11988 xmlGenericError(xmlGenericErrorContext,
11989 "PP: internal error, state == IGNORE");
11990 ctxt->instate = XML_PARSER_DTD;
11991#ifdef DEBUG_PUSH
11992 xmlGenericError(xmlGenericErrorContext,
11993 "PP: entering DTD\n");
11994#endif
11995 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011996 case XML_PARSER_PI:
11997 xmlGenericError(xmlGenericErrorContext,
11998 "PP: internal error, state == PI\n");
11999 ctxt->instate = XML_PARSER_CONTENT;
12000#ifdef DEBUG_PUSH
12001 xmlGenericError(xmlGenericErrorContext,
12002 "PP: entering CONTENT\n");
12003#endif
12004 break;
12005 case XML_PARSER_ENTITY_DECL:
12006 xmlGenericError(xmlGenericErrorContext,
12007 "PP: internal error, state == ENTITY_DECL\n");
12008 ctxt->instate = XML_PARSER_DTD;
12009#ifdef DEBUG_PUSH
12010 xmlGenericError(xmlGenericErrorContext,
12011 "PP: entering DTD\n");
12012#endif
12013 break;
12014 case XML_PARSER_ENTITY_VALUE:
12015 xmlGenericError(xmlGenericErrorContext,
12016 "PP: internal error, state == ENTITY_VALUE\n");
12017 ctxt->instate = XML_PARSER_CONTENT;
12018#ifdef DEBUG_PUSH
12019 xmlGenericError(xmlGenericErrorContext,
12020 "PP: entering DTD\n");
12021#endif
12022 break;
12023 case XML_PARSER_ATTRIBUTE_VALUE:
12024 xmlGenericError(xmlGenericErrorContext,
12025 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12026 ctxt->instate = XML_PARSER_START_TAG;
12027#ifdef DEBUG_PUSH
12028 xmlGenericError(xmlGenericErrorContext,
12029 "PP: entering START_TAG\n");
12030#endif
12031 break;
12032 case XML_PARSER_SYSTEM_LITERAL:
12033 xmlGenericError(xmlGenericErrorContext,
12034 "PP: internal error, state == SYSTEM_LITERAL\n");
12035 ctxt->instate = XML_PARSER_START_TAG;
12036#ifdef DEBUG_PUSH
12037 xmlGenericError(xmlGenericErrorContext,
12038 "PP: entering START_TAG\n");
12039#endif
12040 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012041 case XML_PARSER_PUBLIC_LITERAL:
12042 xmlGenericError(xmlGenericErrorContext,
12043 "PP: internal error, state == PUBLIC_LITERAL\n");
12044 ctxt->instate = XML_PARSER_START_TAG;
12045#ifdef DEBUG_PUSH
12046 xmlGenericError(xmlGenericErrorContext,
12047 "PP: entering START_TAG\n");
12048#endif
12049 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012050 }
12051 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012052done:
Owen Taylor3473f882001-02-23 17:55:21 +000012053#ifdef DEBUG_PUSH
12054 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12055#endif
12056 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012057encoding_error:
12058 {
12059 char buffer[150];
12060
12061 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12062 ctxt->input->cur[0], ctxt->input->cur[1],
12063 ctxt->input->cur[2], ctxt->input->cur[3]);
12064 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12065 "Input is not proper UTF-8, indicate encoding !\n%s",
12066 BAD_CAST buffer, NULL);
12067 }
12068 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012069}
12070
12071/**
Daniel Veillard65686452012-07-19 18:25:01 +080012072 * xmlParseCheckTransition:
12073 * @ctxt: an XML parser context
12074 * @chunk: a char array
12075 * @size: the size in byte of the chunk
12076 *
12077 * Check depending on the current parser state if the chunk given must be
12078 * processed immediately or one need more data to advance on parsing.
12079 *
12080 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12081 */
12082static int
12083xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12084 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12085 return(-1);
12086 if (ctxt->instate == XML_PARSER_START_TAG) {
12087 if (memchr(chunk, '>', size) != NULL)
12088 return(1);
12089 return(0);
12090 }
12091 if (ctxt->progressive == XML_PARSER_COMMENT) {
12092 if (memchr(chunk, '>', size) != NULL)
12093 return(1);
12094 return(0);
12095 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012096 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12097 if (memchr(chunk, '>', size) != NULL)
12098 return(1);
12099 return(0);
12100 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012101 if (ctxt->progressive == XML_PARSER_PI) {
12102 if (memchr(chunk, '>', size) != NULL)
12103 return(1);
12104 return(0);
12105 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012106 if (ctxt->instate == XML_PARSER_END_TAG) {
12107 if (memchr(chunk, '>', size) != NULL)
12108 return(1);
12109 return(0);
12110 }
12111 if ((ctxt->progressive == XML_PARSER_DTD) ||
12112 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012113 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012114 return(1);
12115 return(0);
12116 }
Daniel Veillard65686452012-07-19 18:25:01 +080012117 return(1);
12118}
12119
12120/**
Owen Taylor3473f882001-02-23 17:55:21 +000012121 * xmlParseChunk:
12122 * @ctxt: an XML parser context
12123 * @chunk: an char array
12124 * @size: the size in byte of the chunk
12125 * @terminate: last chunk indicator
12126 *
12127 * Parse a Chunk of memory
12128 *
12129 * Returns zero if no error, the xmlParserErrors otherwise.
12130 */
12131int
12132xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12133 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012134 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012135 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012136 size_t old_avail = 0;
12137 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012138
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012139 if (ctxt == NULL)
12140 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012141 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012142 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012143 if (ctxt->instate == XML_PARSER_EOF)
12144 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012145 if (ctxt->instate == XML_PARSER_START)
12146 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012147 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12148 (chunk[size - 1] == '\r')) {
12149 end_in_lf = 1;
12150 size--;
12151 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012152
12153xmldecl_done:
12154
Owen Taylor3473f882001-02-23 17:55:21 +000012155 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12156 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012157 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12158 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012159 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012160
Daniel Veillard65686452012-07-19 18:25:01 +080012161 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012162 /*
12163 * Specific handling if we autodetected an encoding, we should not
12164 * push more than the first line ... which depend on the encoding
12165 * And only push the rest once the final encoding was detected
12166 */
12167 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12168 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012169 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012170
12171 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12172 BAD_CAST "UTF-16")) ||
12173 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12174 BAD_CAST "UTF16")))
12175 len = 90;
12176 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177 BAD_CAST "UCS-4")) ||
12178 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12179 BAD_CAST "UCS4")))
12180 len = 180;
12181
12182 if (ctxt->input->buf->rawconsumed < len)
12183 len -= ctxt->input->buf->rawconsumed;
12184
Raul Hudeaba9716a2010-03-15 10:13:29 +010012185 /*
12186 * Change size for reading the initial declaration only
12187 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12188 * will blindly copy extra bytes from memory.
12189 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012190 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012191 remain = size - len;
12192 size = len;
12193 } else {
12194 remain = 0;
12195 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012196 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012197 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012198 if (res < 0) {
12199 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012200 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012201 return (XML_PARSER_EOF);
12202 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012203 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012204#ifdef DEBUG_PUSH
12205 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12206#endif
12207
Owen Taylor3473f882001-02-23 17:55:21 +000012208 } else if (ctxt->instate != XML_PARSER_EOF) {
12209 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12210 xmlParserInputBufferPtr in = ctxt->input->buf;
12211 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12212 (in->raw != NULL)) {
12213 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012214 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12215 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012216
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012217 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012218 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012219 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012220 xmlGenericError(xmlGenericErrorContext,
12221 "xmlParseChunk: encoder error\n");
12222 return(XML_ERR_INVALID_ENCODING);
12223 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012224 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012225 }
12226 }
12227 }
Daniel Veillard65686452012-07-19 18:25:01 +080012228 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012229 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012230 } else {
12231 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12232 avail = xmlBufUse(ctxt->input->buf->buffer);
12233 /*
12234 * Depending on the current state it may not be such
12235 * a good idea to try parsing if there is nothing in the chunk
12236 * which would be worth doing a parser state transition and we
12237 * need to wait for more data
12238 */
12239 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12240 (old_avail == 0) || (avail == 0) ||
12241 (xmlParseCheckTransition(ctxt,
12242 (const char *)&ctxt->input->base[old_avail],
12243 avail - old_avail)))
12244 xmlParseTryOrFinish(ctxt, terminate);
12245 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012246 if (ctxt->instate == XML_PARSER_EOF)
12247 return(ctxt->errNo);
12248
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012249 if ((ctxt->input != NULL) &&
12250 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12251 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12252 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12253 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012254 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012255 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012256 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12257 return(ctxt->errNo);
12258
12259 if (remain != 0) {
12260 chunk += size;
12261 size = remain;
12262 remain = 0;
12263 goto xmldecl_done;
12264 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012265 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12266 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012267 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12268 ctxt->input);
12269 size_t current = ctxt->input->cur - ctxt->input->base;
12270
Daniel Veillarda617e242006-01-09 14:38:44 +000012271 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012272
12273 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12274 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012275 }
Owen Taylor3473f882001-02-23 17:55:21 +000012276 if (terminate) {
12277 /*
12278 * Check for termination
12279 */
Daniel Veillard65686452012-07-19 18:25:01 +080012280 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012281
12282 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012283 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012284 cur_avail = ctxt->input->length -
12285 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012286 else
Daniel Veillard65686452012-07-19 18:25:01 +080012287 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12288 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012289 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012290
Owen Taylor3473f882001-02-23 17:55:21 +000012291 if ((ctxt->instate != XML_PARSER_EOF) &&
12292 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012293 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012294 }
Daniel Veillard65686452012-07-19 18:25:01 +080012295 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012296 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012297 }
Owen Taylor3473f882001-02-23 17:55:21 +000012298 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012299 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012300 ctxt->sax->endDocument(ctxt->userData);
12301 }
12302 ctxt->instate = XML_PARSER_EOF;
12303 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012304 if (ctxt->wellFormed == 0)
12305 return((xmlParserErrors) ctxt->errNo);
12306 else
12307 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012308}
12309
12310/************************************************************************
12311 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012312 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012313 * *
12314 ************************************************************************/
12315
12316/**
Owen Taylor3473f882001-02-23 17:55:21 +000012317 * xmlCreatePushParserCtxt:
12318 * @sax: a SAX handler
12319 * @user_data: The user data returned on SAX callbacks
12320 * @chunk: a pointer to an array of chars
12321 * @size: number of chars in the array
12322 * @filename: an optional file name or URI
12323 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012324 * Create a parser context for using the XML parser in push mode.
12325 * If @buffer and @size are non-NULL, the data is used to detect
12326 * the encoding. The remaining characters will be parsed so they
12327 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012328 * To allow content encoding detection, @size should be >= 4
12329 * The value of @filename is used for fetching external entities
12330 * and error/warning reports.
12331 *
12332 * Returns the new parser context or NULL
12333 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012334
Owen Taylor3473f882001-02-23 17:55:21 +000012335xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012336xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012337 const char *chunk, int size, const char *filename) {
12338 xmlParserCtxtPtr ctxt;
12339 xmlParserInputPtr inputStream;
12340 xmlParserInputBufferPtr buf;
12341 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12342
12343 /*
12344 * plug some encoding conversion routines
12345 */
12346 if ((chunk != NULL) && (size >= 4))
12347 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12348
12349 buf = xmlAllocParserInputBuffer(enc);
12350 if (buf == NULL) return(NULL);
12351
12352 ctxt = xmlNewParserCtxt();
12353 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012354 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012355 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012356 return(NULL);
12357 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012358 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012359 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12360 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012361 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012362 xmlFreeParserInputBuffer(buf);
12363 xmlFreeParserCtxt(ctxt);
12364 return(NULL);
12365 }
Owen Taylor3473f882001-02-23 17:55:21 +000012366 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012367#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012368 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012369#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012370 xmlFree(ctxt->sax);
12371 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12372 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012373 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012374 xmlFreeParserInputBuffer(buf);
12375 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012376 return(NULL);
12377 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012378 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12379 if (sax->initialized == XML_SAX2_MAGIC)
12380 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12381 else
12382 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012383 if (user_data != NULL)
12384 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012385 }
Owen Taylor3473f882001-02-23 17:55:21 +000012386 if (filename == NULL) {
12387 ctxt->directory = NULL;
12388 } else {
12389 ctxt->directory = xmlParserGetDirectory(filename);
12390 }
12391
12392 inputStream = xmlNewInputStream(ctxt);
12393 if (inputStream == NULL) {
12394 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012395 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012396 return(NULL);
12397 }
12398
12399 if (filename == NULL)
12400 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012401 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012402 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012403 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012404 if (inputStream->filename == NULL) {
12405 xmlFreeParserCtxt(ctxt);
12406 xmlFreeParserInputBuffer(buf);
12407 return(NULL);
12408 }
12409 }
Owen Taylor3473f882001-02-23 17:55:21 +000012410 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012411 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012412 inputPush(ctxt, inputStream);
12413
William M. Brack3a1cd212005-02-11 14:35:54 +000012414 /*
12415 * If the caller didn't provide an initial 'chunk' for determining
12416 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12417 * that it can be automatically determined later
12418 */
12419 if ((size == 0) || (chunk == NULL)) {
12420 ctxt->charset = XML_CHAR_ENCODING_NONE;
12421 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012422 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12423 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012424
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012425 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012426
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012427 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012428#ifdef DEBUG_PUSH
12429 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12430#endif
12431 }
12432
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012433 if (enc != XML_CHAR_ENCODING_NONE) {
12434 xmlSwitchEncoding(ctxt, enc);
12435 }
12436
Owen Taylor3473f882001-02-23 17:55:21 +000012437 return(ctxt);
12438}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012439#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012440
12441/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012442 * xmlHaltParser:
12443 * @ctxt: an XML parser context
12444 *
12445 * Blocks further parser processing don't override error
12446 * for internal use
12447 */
12448static void
12449xmlHaltParser(xmlParserCtxtPtr ctxt) {
12450 if (ctxt == NULL)
12451 return;
12452 ctxt->instate = XML_PARSER_EOF;
12453 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012454 while (ctxt->inputNr > 1)
12455 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012456 if (ctxt->input != NULL) {
12457 /*
12458 * in case there was a specific allocation deallocate before
12459 * overriding base
12460 */
12461 if (ctxt->input->free != NULL) {
12462 ctxt->input->free((xmlChar *) ctxt->input->base);
12463 ctxt->input->free = NULL;
12464 }
12465 ctxt->input->cur = BAD_CAST"";
12466 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012467 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012468 }
12469}
12470
12471/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012472 * xmlStopParser:
12473 * @ctxt: an XML parser context
12474 *
12475 * Blocks further parser processing
12476 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012477void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012478xmlStopParser(xmlParserCtxtPtr ctxt) {
12479 if (ctxt == NULL)
12480 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012481 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012482 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012483}
12484
12485/**
Owen Taylor3473f882001-02-23 17:55:21 +000012486 * xmlCreateIOParserCtxt:
12487 * @sax: a SAX handler
12488 * @user_data: The user data returned on SAX callbacks
12489 * @ioread: an I/O read function
12490 * @ioclose: an I/O close function
12491 * @ioctx: an I/O handler
12492 * @enc: the charset encoding if known
12493 *
12494 * Create a parser context for using the XML parser with an existing
12495 * I/O stream
12496 *
12497 * Returns the new parser context or NULL
12498 */
12499xmlParserCtxtPtr
12500xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12501 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12502 void *ioctx, xmlCharEncoding enc) {
12503 xmlParserCtxtPtr ctxt;
12504 xmlParserInputPtr inputStream;
12505 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012506
Daniel Veillard42595322004-11-08 10:52:06 +000012507 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012508
12509 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012510 if (buf == NULL) {
12511 if (ioclose != NULL)
12512 ioclose(ioctx);
12513 return (NULL);
12514 }
Owen Taylor3473f882001-02-23 17:55:21 +000012515
12516 ctxt = xmlNewParserCtxt();
12517 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012518 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012519 return(NULL);
12520 }
12521 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012522#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012523 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012524#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012525 xmlFree(ctxt->sax);
12526 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12527 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012528 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012529 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012530 return(NULL);
12531 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012532 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12533 if (sax->initialized == XML_SAX2_MAGIC)
12534 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12535 else
12536 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012537 if (user_data != NULL)
12538 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012539 }
Owen Taylor3473f882001-02-23 17:55:21 +000012540
12541 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12542 if (inputStream == NULL) {
12543 xmlFreeParserCtxt(ctxt);
12544 return(NULL);
12545 }
12546 inputPush(ctxt, inputStream);
12547
12548 return(ctxt);
12549}
12550
Daniel Veillard4432df22003-09-28 18:58:27 +000012551#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012552/************************************************************************
12553 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012554 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012555 * *
12556 ************************************************************************/
12557
12558/**
12559 * xmlIOParseDTD:
12560 * @sax: the SAX handler block or NULL
12561 * @input: an Input Buffer
12562 * @enc: the charset encoding if known
12563 *
12564 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012565 *
Owen Taylor3473f882001-02-23 17:55:21 +000012566 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012567 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012568 */
12569
12570xmlDtdPtr
12571xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12572 xmlCharEncoding enc) {
12573 xmlDtdPtr ret = NULL;
12574 xmlParserCtxtPtr ctxt;
12575 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012576 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012577
12578 if (input == NULL)
12579 return(NULL);
12580
12581 ctxt = xmlNewParserCtxt();
12582 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012583 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012584 return(NULL);
12585 }
12586
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012587 /* We are loading a DTD */
12588 ctxt->options |= XML_PARSE_DTDLOAD;
12589
Owen Taylor3473f882001-02-23 17:55:21 +000012590 /*
12591 * Set-up the SAX context
12592 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012593 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012594 if (ctxt->sax != NULL)
12595 xmlFree(ctxt->sax);
12596 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012597 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012598 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012599 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012600
12601 /*
12602 * generate a parser input from the I/O handler
12603 */
12604
Daniel Veillard43caefb2003-12-07 19:32:22 +000012605 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012606 if (pinput == NULL) {
12607 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012608 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012609 xmlFreeParserCtxt(ctxt);
12610 return(NULL);
12611 }
12612
12613 /*
12614 * plug some encoding conversion routines here.
12615 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012616 if (xmlPushInput(ctxt, pinput) < 0) {
12617 if (sax != NULL) ctxt->sax = NULL;
12618 xmlFreeParserCtxt(ctxt);
12619 return(NULL);
12620 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012621 if (enc != XML_CHAR_ENCODING_NONE) {
12622 xmlSwitchEncoding(ctxt, enc);
12623 }
Owen Taylor3473f882001-02-23 17:55:21 +000012624
12625 pinput->filename = NULL;
12626 pinput->line = 1;
12627 pinput->col = 1;
12628 pinput->base = ctxt->input->cur;
12629 pinput->cur = ctxt->input->cur;
12630 pinput->free = NULL;
12631
12632 /*
12633 * let's parse that entity knowing it's an external subset.
12634 */
12635 ctxt->inSubset = 2;
12636 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012637 if (ctxt->myDoc == NULL) {
12638 xmlErrMemory(ctxt, "New Doc failed");
12639 return(NULL);
12640 }
12641 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012642 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12643 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012644
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012645 if ((enc == XML_CHAR_ENCODING_NONE) &&
12646 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012647 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012648 * Get the 4 first bytes and decode the charset
12649 * if enc != XML_CHAR_ENCODING_NONE
12650 * plug some encoding conversion routines.
12651 */
12652 start[0] = RAW;
12653 start[1] = NXT(1);
12654 start[2] = NXT(2);
12655 start[3] = NXT(3);
12656 enc = xmlDetectCharEncoding(start, 4);
12657 if (enc != XML_CHAR_ENCODING_NONE) {
12658 xmlSwitchEncoding(ctxt, enc);
12659 }
12660 }
12661
Owen Taylor3473f882001-02-23 17:55:21 +000012662 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12663
12664 if (ctxt->myDoc != NULL) {
12665 if (ctxt->wellFormed) {
12666 ret = ctxt->myDoc->extSubset;
12667 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012668 if (ret != NULL) {
12669 xmlNodePtr tmp;
12670
12671 ret->doc = NULL;
12672 tmp = ret->children;
12673 while (tmp != NULL) {
12674 tmp->doc = NULL;
12675 tmp = tmp->next;
12676 }
12677 }
Owen Taylor3473f882001-02-23 17:55:21 +000012678 } else {
12679 ret = NULL;
12680 }
12681 xmlFreeDoc(ctxt->myDoc);
12682 ctxt->myDoc = NULL;
12683 }
12684 if (sax != NULL) ctxt->sax = NULL;
12685 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012686
Owen Taylor3473f882001-02-23 17:55:21 +000012687 return(ret);
12688}
12689
12690/**
12691 * xmlSAXParseDTD:
12692 * @sax: the SAX handler block
12693 * @ExternalID: a NAME* containing the External ID of the DTD
12694 * @SystemID: a NAME* containing the URL to the DTD
12695 *
12696 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012697 *
Owen Taylor3473f882001-02-23 17:55:21 +000012698 * Returns the resulting xmlDtdPtr or NULL in case of error.
12699 */
12700
12701xmlDtdPtr
12702xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12703 const xmlChar *SystemID) {
12704 xmlDtdPtr ret = NULL;
12705 xmlParserCtxtPtr ctxt;
12706 xmlParserInputPtr input = NULL;
12707 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012708 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012709
12710 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12711
12712 ctxt = xmlNewParserCtxt();
12713 if (ctxt == NULL) {
12714 return(NULL);
12715 }
12716
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012717 /* We are loading a DTD */
12718 ctxt->options |= XML_PARSE_DTDLOAD;
12719
Owen Taylor3473f882001-02-23 17:55:21 +000012720 /*
12721 * Set-up the SAX context
12722 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012723 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012724 if (ctxt->sax != NULL)
12725 xmlFree(ctxt->sax);
12726 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012727 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012728 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012729
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012730 /*
12731 * Canonicalise the system ID
12732 */
12733 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012734 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012735 xmlFreeParserCtxt(ctxt);
12736 return(NULL);
12737 }
Owen Taylor3473f882001-02-23 17:55:21 +000012738
12739 /*
12740 * Ask the Entity resolver to load the damn thing
12741 */
12742
12743 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012744 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12745 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012746 if (input == NULL) {
12747 if (sax != NULL) ctxt->sax = NULL;
12748 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012749 if (systemIdCanonic != NULL)
12750 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012751 return(NULL);
12752 }
12753
12754 /*
12755 * plug some encoding conversion routines here.
12756 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012757 if (xmlPushInput(ctxt, input) < 0) {
12758 if (sax != NULL) ctxt->sax = NULL;
12759 xmlFreeParserCtxt(ctxt);
12760 if (systemIdCanonic != NULL)
12761 xmlFree(systemIdCanonic);
12762 return(NULL);
12763 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012764 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12765 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12766 xmlSwitchEncoding(ctxt, enc);
12767 }
Owen Taylor3473f882001-02-23 17:55:21 +000012768
12769 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012770 input->filename = (char *) systemIdCanonic;
12771 else
12772 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012773 input->line = 1;
12774 input->col = 1;
12775 input->base = ctxt->input->cur;
12776 input->cur = ctxt->input->cur;
12777 input->free = NULL;
12778
12779 /*
12780 * let's parse that entity knowing it's an external subset.
12781 */
12782 ctxt->inSubset = 2;
12783 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012784 if (ctxt->myDoc == NULL) {
12785 xmlErrMemory(ctxt, "New Doc failed");
12786 if (sax != NULL) ctxt->sax = NULL;
12787 xmlFreeParserCtxt(ctxt);
12788 return(NULL);
12789 }
12790 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012791 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12792 ExternalID, SystemID);
12793 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12794
12795 if (ctxt->myDoc != NULL) {
12796 if (ctxt->wellFormed) {
12797 ret = ctxt->myDoc->extSubset;
12798 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012799 if (ret != NULL) {
12800 xmlNodePtr tmp;
12801
12802 ret->doc = NULL;
12803 tmp = ret->children;
12804 while (tmp != NULL) {
12805 tmp->doc = NULL;
12806 tmp = tmp->next;
12807 }
12808 }
Owen Taylor3473f882001-02-23 17:55:21 +000012809 } else {
12810 ret = NULL;
12811 }
12812 xmlFreeDoc(ctxt->myDoc);
12813 ctxt->myDoc = NULL;
12814 }
12815 if (sax != NULL) ctxt->sax = NULL;
12816 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012817
Owen Taylor3473f882001-02-23 17:55:21 +000012818 return(ret);
12819}
12820
Daniel Veillard4432df22003-09-28 18:58:27 +000012821
Owen Taylor3473f882001-02-23 17:55:21 +000012822/**
12823 * xmlParseDTD:
12824 * @ExternalID: a NAME* containing the External ID of the DTD
12825 * @SystemID: a NAME* containing the URL to the DTD
12826 *
12827 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012828 *
Owen Taylor3473f882001-02-23 17:55:21 +000012829 * Returns the resulting xmlDtdPtr or NULL in case of error.
12830 */
12831
12832xmlDtdPtr
12833xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12834 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12835}
Daniel Veillard4432df22003-09-28 18:58:27 +000012836#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012837
12838/************************************************************************
12839 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012840 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012841 * *
12842 ************************************************************************/
12843
12844/**
Owen Taylor3473f882001-02-23 17:55:21 +000012845 * xmlParseCtxtExternalEntity:
12846 * @ctx: the existing parsing context
12847 * @URL: the URL for the entity to load
12848 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012849 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012850 *
12851 * Parse an external general entity within an existing parsing context
12852 * An external general parsed entity is well-formed if it matches the
12853 * production labeled extParsedEnt.
12854 *
12855 * [78] extParsedEnt ::= TextDecl? content
12856 *
12857 * Returns 0 if the entity is well formed, -1 in case of args problem and
12858 * the parser error code otherwise
12859 */
12860
12861int
12862xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012863 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012864 xmlParserCtxtPtr ctxt;
12865 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012866 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012867 xmlSAXHandlerPtr oldsax = NULL;
12868 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012869 xmlChar start[4];
12870 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012871
Daniel Veillardce682bc2004-11-05 17:22:25 +000012872 if (ctx == NULL) return(-1);
12873
Daniel Veillard0161e632008-08-28 15:36:32 +000012874 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12875 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012876 return(XML_ERR_ENTITY_LOOP);
12877 }
12878
Daniel Veillardcda96922001-08-21 10:56:31 +000012879 if (lst != NULL)
12880 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012881 if ((URL == NULL) && (ID == NULL))
12882 return(-1);
12883 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12884 return(-1);
12885
Rob Richards798743a2009-06-19 13:54:25 -040012886 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012887 if (ctxt == NULL) {
12888 return(-1);
12889 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012890
Owen Taylor3473f882001-02-23 17:55:21 +000012891 oldsax = ctxt->sax;
12892 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012893 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012894 newDoc = xmlNewDoc(BAD_CAST "1.0");
12895 if (newDoc == NULL) {
12896 xmlFreeParserCtxt(ctxt);
12897 return(-1);
12898 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012899 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012900 if (ctx->myDoc->dict) {
12901 newDoc->dict = ctx->myDoc->dict;
12902 xmlDictReference(newDoc->dict);
12903 }
Owen Taylor3473f882001-02-23 17:55:21 +000012904 if (ctx->myDoc != NULL) {
12905 newDoc->intSubset = ctx->myDoc->intSubset;
12906 newDoc->extSubset = ctx->myDoc->extSubset;
12907 }
12908 if (ctx->myDoc->URL != NULL) {
12909 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12910 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012911 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12912 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012913 ctxt->sax = oldsax;
12914 xmlFreeParserCtxt(ctxt);
12915 newDoc->intSubset = NULL;
12916 newDoc->extSubset = NULL;
12917 xmlFreeDoc(newDoc);
12918 return(-1);
12919 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012920 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012921 nodePush(ctxt, newDoc->children);
12922 if (ctx->myDoc == NULL) {
12923 ctxt->myDoc = newDoc;
12924 } else {
12925 ctxt->myDoc = ctx->myDoc;
12926 newDoc->children->doc = ctx->myDoc;
12927 }
12928
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012929 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012930 * Get the 4 first bytes and decode the charset
12931 * if enc != XML_CHAR_ENCODING_NONE
12932 * plug some encoding conversion routines.
12933 */
12934 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012935 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12936 start[0] = RAW;
12937 start[1] = NXT(1);
12938 start[2] = NXT(2);
12939 start[3] = NXT(3);
12940 enc = xmlDetectCharEncoding(start, 4);
12941 if (enc != XML_CHAR_ENCODING_NONE) {
12942 xmlSwitchEncoding(ctxt, enc);
12943 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012944 }
12945
Owen Taylor3473f882001-02-23 17:55:21 +000012946 /*
12947 * Parse a possible text declaration first
12948 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012949 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012950 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012951 /*
12952 * An XML-1.0 document can't reference an entity not XML-1.0
12953 */
12954 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12955 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012956 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012957 "Version mismatch between document and entity\n");
12958 }
Owen Taylor3473f882001-02-23 17:55:21 +000012959 }
12960
12961 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012962 * If the user provided its own SAX callbacks then reuse the
12963 * useData callback field, otherwise the expected setup in a
12964 * DOM builder is to have userData == ctxt
12965 */
12966 if (ctx->userData == ctx)
12967 ctxt->userData = ctxt;
12968 else
12969 ctxt->userData = ctx->userData;
12970
12971 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012972 * Doing validity checking on chunk doesn't make sense
12973 */
12974 ctxt->instate = XML_PARSER_CONTENT;
12975 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012976 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012977 ctxt->loadsubset = ctx->loadsubset;
12978 ctxt->depth = ctx->depth + 1;
12979 ctxt->replaceEntities = ctx->replaceEntities;
12980 if (ctxt->validate) {
12981 ctxt->vctxt.error = ctx->vctxt.error;
12982 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012983 } else {
12984 ctxt->vctxt.error = NULL;
12985 ctxt->vctxt.warning = NULL;
12986 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012987 ctxt->vctxt.nodeTab = NULL;
12988 ctxt->vctxt.nodeNr = 0;
12989 ctxt->vctxt.nodeMax = 0;
12990 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012991 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12992 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012993 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12994 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12995 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012996 ctxt->dictNames = ctx->dictNames;
12997 ctxt->attsDefault = ctx->attsDefault;
12998 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012999 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013000
13001 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013002
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013003 ctx->validate = ctxt->validate;
13004 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013005 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013006 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013007 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013008 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013009 }
13010 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013011 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013012 }
13013
13014 if (!ctxt->wellFormed) {
13015 if (ctxt->errNo == 0)
13016 ret = 1;
13017 else
13018 ret = ctxt->errNo;
13019 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013020 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013021 xmlNodePtr cur;
13022
13023 /*
13024 * Return the newly created nodeset after unlinking it from
13025 * they pseudo parent.
13026 */
13027 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013028 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013029 while (cur != NULL) {
13030 cur->parent = NULL;
13031 cur = cur->next;
13032 }
13033 newDoc->children->children = NULL;
13034 }
13035 ret = 0;
13036 }
13037 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013038 ctxt->dict = NULL;
13039 ctxt->attsDefault = NULL;
13040 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013041 xmlFreeParserCtxt(ctxt);
13042 newDoc->intSubset = NULL;
13043 newDoc->extSubset = NULL;
13044 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013045
Owen Taylor3473f882001-02-23 17:55:21 +000013046 return(ret);
13047}
13048
13049/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013050 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013051 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013052 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013053 * @sax: the SAX handler bloc (possibly NULL)
13054 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13055 * @depth: Used for loop detection, use 0
13056 * @URL: the URL for the entity to load
13057 * @ID: the System ID for the entity to load
13058 * @list: the return value for the set of parsed nodes
13059 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013060 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013061 *
13062 * Returns 0 if the entity is well formed, -1 in case of args problem and
13063 * the parser error code otherwise
13064 */
13065
Daniel Veillard7d515752003-09-26 19:12:37 +000013066static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013067xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13068 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013069 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013070 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013071 xmlParserCtxtPtr ctxt;
13072 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013073 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013074 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013075 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013076 xmlChar start[4];
13077 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013078
Daniel Veillard0161e632008-08-28 15:36:32 +000013079 if (((depth > 40) &&
13080 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13081 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013082 return(XML_ERR_ENTITY_LOOP);
13083 }
13084
Owen Taylor3473f882001-02-23 17:55:21 +000013085 if (list != NULL)
13086 *list = NULL;
13087 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013088 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013089 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013090 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013091
13092
Rob Richards9c0aa472009-03-26 18:10:19 +000013093 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013094 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013095 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013096 if (oldctxt != NULL) {
13097 ctxt->_private = oldctxt->_private;
13098 ctxt->loadsubset = oldctxt->loadsubset;
13099 ctxt->validate = oldctxt->validate;
13100 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013101 ctxt->record_info = oldctxt->record_info;
13102 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13103 ctxt->node_seq.length = oldctxt->node_seq.length;
13104 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013105 } else {
13106 /*
13107 * Doing validity checking on chunk without context
13108 * doesn't make sense
13109 */
13110 ctxt->_private = NULL;
13111 ctxt->validate = 0;
13112 ctxt->external = 2;
13113 ctxt->loadsubset = 0;
13114 }
Owen Taylor3473f882001-02-23 17:55:21 +000013115 if (sax != NULL) {
13116 oldsax = ctxt->sax;
13117 ctxt->sax = sax;
13118 if (user_data != NULL)
13119 ctxt->userData = user_data;
13120 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013121 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013122 newDoc = xmlNewDoc(BAD_CAST "1.0");
13123 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013124 ctxt->node_seq.maximum = 0;
13125 ctxt->node_seq.length = 0;
13126 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013127 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013128 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013129 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013130 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013131 newDoc->intSubset = doc->intSubset;
13132 newDoc->extSubset = doc->extSubset;
13133 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013134 xmlDictReference(newDoc->dict);
13135
Owen Taylor3473f882001-02-23 17:55:21 +000013136 if (doc->URL != NULL) {
13137 newDoc->URL = xmlStrdup(doc->URL);
13138 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013139 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13140 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013141 if (sax != NULL)
13142 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013143 ctxt->node_seq.maximum = 0;
13144 ctxt->node_seq.length = 0;
13145 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013146 xmlFreeParserCtxt(ctxt);
13147 newDoc->intSubset = NULL;
13148 newDoc->extSubset = NULL;
13149 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013150 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013151 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013152 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013153 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013154 ctxt->myDoc = doc;
13155 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013156
Daniel Veillard0161e632008-08-28 15:36:32 +000013157 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013158 * Get the 4 first bytes and decode the charset
13159 * if enc != XML_CHAR_ENCODING_NONE
13160 * plug some encoding conversion routines.
13161 */
13162 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013163 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13164 start[0] = RAW;
13165 start[1] = NXT(1);
13166 start[2] = NXT(2);
13167 start[3] = NXT(3);
13168 enc = xmlDetectCharEncoding(start, 4);
13169 if (enc != XML_CHAR_ENCODING_NONE) {
13170 xmlSwitchEncoding(ctxt, enc);
13171 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013172 }
13173
Owen Taylor3473f882001-02-23 17:55:21 +000013174 /*
13175 * Parse a possible text declaration first
13176 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013177 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013178 xmlParseTextDecl(ctxt);
13179 }
13180
Owen Taylor3473f882001-02-23 17:55:21 +000013181 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013182 ctxt->depth = depth;
13183
13184 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013185
Daniel Veillard561b7f82002-03-20 21:55:57 +000013186 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013187 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013188 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013189 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013190 }
13191 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013192 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013193 }
13194
13195 if (!ctxt->wellFormed) {
13196 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013197 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013198 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013199 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013200 } else {
13201 if (list != NULL) {
13202 xmlNodePtr cur;
13203
13204 /*
13205 * Return the newly created nodeset after unlinking it from
13206 * they pseudo parent.
13207 */
13208 cur = newDoc->children->children;
13209 *list = cur;
13210 while (cur != NULL) {
13211 cur->parent = NULL;
13212 cur = cur->next;
13213 }
13214 newDoc->children->children = NULL;
13215 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013216 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013217 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013218
13219 /*
13220 * Record in the parent context the number of entities replacement
13221 * done when parsing that reference.
13222 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013223 if (oldctxt != NULL)
13224 oldctxt->nbentities += ctxt->nbentities;
13225
Daniel Veillard0161e632008-08-28 15:36:32 +000013226 /*
13227 * Also record the size of the entity parsed
13228 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013229 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013230 oldctxt->sizeentities += ctxt->input->consumed;
13231 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13232 }
13233 /*
13234 * And record the last error if any
13235 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013236 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013237 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13238
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013239 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013240 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013241 if (oldctxt != NULL) {
13242 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13243 oldctxt->node_seq.length = ctxt->node_seq.length;
13244 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13245 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013246 ctxt->node_seq.maximum = 0;
13247 ctxt->node_seq.length = 0;
13248 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013249 xmlFreeParserCtxt(ctxt);
13250 newDoc->intSubset = NULL;
13251 newDoc->extSubset = NULL;
13252 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013253
Owen Taylor3473f882001-02-23 17:55:21 +000013254 return(ret);
13255}
13256
Daniel Veillard81273902003-09-30 00:43:48 +000013257#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013258/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013259 * xmlParseExternalEntity:
13260 * @doc: the document the chunk pertains to
13261 * @sax: the SAX handler bloc (possibly NULL)
13262 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13263 * @depth: Used for loop detection, use 0
13264 * @URL: the URL for the entity to load
13265 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013266 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013267 *
13268 * Parse an external general entity
13269 * An external general parsed entity is well-formed if it matches the
13270 * production labeled extParsedEnt.
13271 *
13272 * [78] extParsedEnt ::= TextDecl? content
13273 *
13274 * Returns 0 if the entity is well formed, -1 in case of args problem and
13275 * the parser error code otherwise
13276 */
13277
13278int
13279xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013280 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013281 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013282 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013283}
13284
13285/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013286 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013287 * @doc: the document the chunk pertains to
13288 * @sax: the SAX handler bloc (possibly NULL)
13289 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13290 * @depth: Used for loop detection, use 0
13291 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013292 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013293 *
13294 * Parse a well-balanced chunk of an XML document
13295 * called by the parser
13296 * The allowed sequence for the Well Balanced Chunk is the one defined by
13297 * the content production in the XML grammar:
13298 *
13299 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13300 *
13301 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13302 * the parser error code otherwise
13303 */
13304
13305int
13306xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013307 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013308 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13309 depth, string, lst, 0 );
13310}
Daniel Veillard81273902003-09-30 00:43:48 +000013311#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013312
13313/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013314 * xmlParseBalancedChunkMemoryInternal:
13315 * @oldctxt: the existing parsing context
13316 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13317 * @user_data: the user data field for the parser context
13318 * @lst: the return value for the set of parsed nodes
13319 *
13320 *
13321 * Parse a well-balanced chunk of an XML document
13322 * called by the parser
13323 * The allowed sequence for the Well Balanced Chunk is the one defined by
13324 * the content production in the XML grammar:
13325 *
13326 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13327 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013328 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13329 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013330 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013331 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013332 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013333 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013334static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013335xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13336 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13337 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013338 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013339 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013340 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013341 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013342 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013343 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013344 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013345#ifdef SAX2
13346 int i;
13347#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013348
Daniel Veillard0161e632008-08-28 15:36:32 +000013349 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13350 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013351 return(XML_ERR_ENTITY_LOOP);
13352 }
13353
13354
13355 if (lst != NULL)
13356 *lst = NULL;
13357 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013358 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013359
13360 size = xmlStrlen(string);
13361
13362 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013363 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013364 if (user_data != NULL)
13365 ctxt->userData = user_data;
13366 else
13367 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013368 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13369 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013370 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13371 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13372 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013373
Daniel Veillard74eaec12009-08-26 15:57:20 +020013374#ifdef SAX2
13375 /* propagate namespaces down the entity */
13376 for (i = 0;i < oldctxt->nsNr;i += 2) {
13377 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13378 }
13379#endif
13380
Daniel Veillard328f48c2002-11-15 15:24:34 +000013381 oldsax = ctxt->sax;
13382 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013383 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013384 ctxt->replaceEntities = oldctxt->replaceEntities;
13385 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013386
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013387 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013388 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013389 newDoc = xmlNewDoc(BAD_CAST "1.0");
13390 if (newDoc == NULL) {
13391 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013392 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013393 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013394 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013395 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013396 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013397 newDoc->dict = ctxt->dict;
13398 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013399 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013400 } else {
13401 ctxt->myDoc = oldctxt->myDoc;
13402 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013403 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013404 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013405 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13406 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013407 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013408 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013409 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013410 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013411 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013412 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013413 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013414 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013415 ctxt->myDoc->children = NULL;
13416 ctxt->myDoc->last = NULL;
13417 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013418 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013419 ctxt->instate = XML_PARSER_CONTENT;
13420 ctxt->depth = oldctxt->depth + 1;
13421
Daniel Veillard328f48c2002-11-15 15:24:34 +000013422 ctxt->validate = 0;
13423 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013424 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13425 /*
13426 * ID/IDREF registration will be done in xmlValidateElement below
13427 */
13428 ctxt->loadsubset |= XML_SKIP_IDS;
13429 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013430 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013431 ctxt->attsDefault = oldctxt->attsDefault;
13432 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013433
Daniel Veillard68e9e742002-11-16 15:35:11 +000013434 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013435 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013436 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013437 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013438 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013439 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013440 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013442 }
13443
13444 if (!ctxt->wellFormed) {
13445 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013446 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013447 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013448 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013449 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013450 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013451 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013452
William M. Brack7b9154b2003-09-27 19:23:50 +000013453 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013454 xmlNodePtr cur;
13455
13456 /*
13457 * Return the newly created nodeset after unlinking it from
13458 * they pseudo parent.
13459 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013460 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013461 *lst = cur;
13462 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013463#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013464 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13465 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13466 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013467 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13468 oldctxt->myDoc, cur);
13469 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013470#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013471 cur->parent = NULL;
13472 cur = cur->next;
13473 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013474 ctxt->myDoc->children->children = NULL;
13475 }
13476 if (ctxt->myDoc != NULL) {
13477 xmlFreeNode(ctxt->myDoc->children);
13478 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013479 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013480 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013481
13482 /*
13483 * Record in the parent context the number of entities replacement
13484 * done when parsing that reference.
13485 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013486 if (oldctxt != NULL)
13487 oldctxt->nbentities += ctxt->nbentities;
13488
Daniel Veillard0161e632008-08-28 15:36:32 +000013489 /*
13490 * Also record the last error if any
13491 */
13492 if (ctxt->lastError.code != XML_ERR_OK)
13493 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13494
Daniel Veillard328f48c2002-11-15 15:24:34 +000013495 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013496 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013497 ctxt->attsDefault = NULL;
13498 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013499 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013500 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013501 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013502 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013503
Daniel Veillard328f48c2002-11-15 15:24:34 +000013504 return(ret);
13505}
13506
Daniel Veillard29b17482004-08-16 00:39:03 +000013507/**
13508 * xmlParseInNodeContext:
13509 * @node: the context node
13510 * @data: the input string
13511 * @datalen: the input string length in bytes
13512 * @options: a combination of xmlParserOption
13513 * @lst: the return value for the set of parsed nodes
13514 *
13515 * Parse a well-balanced chunk of an XML document
13516 * within the context (DTD, namespaces, etc ...) of the given node.
13517 *
13518 * The allowed sequence for the data is a Well Balanced Chunk defined by
13519 * the content production in the XML grammar:
13520 *
13521 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13522 *
13523 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13524 * error code otherwise
13525 */
13526xmlParserErrors
13527xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13528 int options, xmlNodePtr *lst) {
13529#ifdef SAX2
13530 xmlParserCtxtPtr ctxt;
13531 xmlDocPtr doc = NULL;
13532 xmlNodePtr fake, cur;
13533 int nsnr = 0;
13534
13535 xmlParserErrors ret = XML_ERR_OK;
13536
13537 /*
13538 * check all input parameters, grab the document
13539 */
13540 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13541 return(XML_ERR_INTERNAL_ERROR);
13542 switch (node->type) {
13543 case XML_ELEMENT_NODE:
13544 case XML_ATTRIBUTE_NODE:
13545 case XML_TEXT_NODE:
13546 case XML_CDATA_SECTION_NODE:
13547 case XML_ENTITY_REF_NODE:
13548 case XML_PI_NODE:
13549 case XML_COMMENT_NODE:
13550 case XML_DOCUMENT_NODE:
13551 case XML_HTML_DOCUMENT_NODE:
13552 break;
13553 default:
13554 return(XML_ERR_INTERNAL_ERROR);
13555
13556 }
13557 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13558 (node->type != XML_DOCUMENT_NODE) &&
13559 (node->type != XML_HTML_DOCUMENT_NODE))
13560 node = node->parent;
13561 if (node == NULL)
13562 return(XML_ERR_INTERNAL_ERROR);
13563 if (node->type == XML_ELEMENT_NODE)
13564 doc = node->doc;
13565 else
13566 doc = (xmlDocPtr) node;
13567 if (doc == NULL)
13568 return(XML_ERR_INTERNAL_ERROR);
13569
13570 /*
13571 * allocate a context and set-up everything not related to the
13572 * node position in the tree
13573 */
13574 if (doc->type == XML_DOCUMENT_NODE)
13575 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13576#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013577 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013578 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013579 /*
13580 * When parsing in context, it makes no sense to add implied
13581 * elements like html/body/etc...
13582 */
13583 options |= HTML_PARSE_NOIMPLIED;
13584 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013585#endif
13586 else
13587 return(XML_ERR_INTERNAL_ERROR);
13588
13589 if (ctxt == NULL)
13590 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013591
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013592 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013593 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13594 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13595 * we must wait until the last moment to free the original one.
13596 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013597 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013598 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013599 xmlDictFree(ctxt->dict);
13600 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013601 } else
13602 options |= XML_PARSE_NODICT;
13603
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013604 if (doc->encoding != NULL) {
13605 xmlCharEncodingHandlerPtr hdlr;
13606
13607 if (ctxt->encoding != NULL)
13608 xmlFree((xmlChar *) ctxt->encoding);
13609 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13610
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013611 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013612 if (hdlr != NULL) {
13613 xmlSwitchToEncoding(ctxt, hdlr);
13614 } else {
13615 return(XML_ERR_UNSUPPORTED_ENCODING);
13616 }
13617 }
13618
Daniel Veillard37334572008-07-31 08:20:02 +000013619 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013620 xmlDetectSAX2(ctxt);
13621 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013622 /* parsing in context, i.e. as within existing content */
13623 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013624
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013625 fake = xmlNewComment(NULL);
13626 if (fake == NULL) {
13627 xmlFreeParserCtxt(ctxt);
13628 return(XML_ERR_NO_MEMORY);
13629 }
13630 xmlAddChild(node, fake);
13631
Daniel Veillard29b17482004-08-16 00:39:03 +000013632 if (node->type == XML_ELEMENT_NODE) {
13633 nodePush(ctxt, node);
13634 /*
13635 * initialize the SAX2 namespaces stack
13636 */
13637 cur = node;
13638 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13639 xmlNsPtr ns = cur->nsDef;
13640 const xmlChar *iprefix, *ihref;
13641
13642 while (ns != NULL) {
13643 if (ctxt->dict) {
13644 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13645 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13646 } else {
13647 iprefix = ns->prefix;
13648 ihref = ns->href;
13649 }
13650
13651 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13652 nsPush(ctxt, iprefix, ihref);
13653 nsnr++;
13654 }
13655 ns = ns->next;
13656 }
13657 cur = cur->parent;
13658 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013659 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013660
13661 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13662 /*
13663 * ID/IDREF registration will be done in xmlValidateElement below
13664 */
13665 ctxt->loadsubset |= XML_SKIP_IDS;
13666 }
13667
Daniel Veillard499cc922006-01-18 17:22:35 +000013668#ifdef LIBXML_HTML_ENABLED
13669 if (doc->type == XML_HTML_DOCUMENT_NODE)
13670 __htmlParseContent(ctxt);
13671 else
13672#endif
13673 xmlParseContent(ctxt);
13674
Daniel Veillard29b17482004-08-16 00:39:03 +000013675 nsPop(ctxt, nsnr);
13676 if ((RAW == '<') && (NXT(1) == '/')) {
13677 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13678 } else if (RAW != 0) {
13679 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13680 }
13681 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13682 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13683 ctxt->wellFormed = 0;
13684 }
13685
13686 if (!ctxt->wellFormed) {
13687 if (ctxt->errNo == 0)
13688 ret = XML_ERR_INTERNAL_ERROR;
13689 else
13690 ret = (xmlParserErrors)ctxt->errNo;
13691 } else {
13692 ret = XML_ERR_OK;
13693 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013694
Daniel Veillard29b17482004-08-16 00:39:03 +000013695 /*
13696 * Return the newly created nodeset after unlinking it from
13697 * the pseudo sibling.
13698 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013699
Daniel Veillard29b17482004-08-16 00:39:03 +000013700 cur = fake->next;
13701 fake->next = NULL;
13702 node->last = fake;
13703
13704 if (cur != NULL) {
13705 cur->prev = NULL;
13706 }
13707
13708 *lst = cur;
13709
13710 while (cur != NULL) {
13711 cur->parent = NULL;
13712 cur = cur->next;
13713 }
13714
13715 xmlUnlinkNode(fake);
13716 xmlFreeNode(fake);
13717
13718
13719 if (ret != XML_ERR_OK) {
13720 xmlFreeNodeList(*lst);
13721 *lst = NULL;
13722 }
William M. Brackc3f81342004-10-03 01:22:44 +000013723
William M. Brackb7b54de2004-10-06 16:38:01 +000013724 if (doc->dict != NULL)
13725 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013726 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013727
Daniel Veillard29b17482004-08-16 00:39:03 +000013728 return(ret);
13729#else /* !SAX2 */
13730 return(XML_ERR_INTERNAL_ERROR);
13731#endif
13732}
13733
Daniel Veillard81273902003-09-30 00:43:48 +000013734#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013735/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013736 * xmlParseBalancedChunkMemoryRecover:
13737 * @doc: the document the chunk pertains to
13738 * @sax: the SAX handler bloc (possibly NULL)
13739 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13740 * @depth: Used for loop detection, use 0
13741 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13742 * @lst: the return value for the set of parsed nodes
13743 * @recover: return nodes even if the data is broken (use 0)
13744 *
13745 *
13746 * Parse a well-balanced chunk of an XML document
13747 * called by the parser
13748 * The allowed sequence for the Well Balanced Chunk is the one defined by
13749 * the content production in the XML grammar:
13750 *
13751 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13752 *
13753 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13754 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013755 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013756 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013757 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13758 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013759 */
13760int
13761xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013762 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013763 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013764 xmlParserCtxtPtr ctxt;
13765 xmlDocPtr newDoc;
13766 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013767 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013768 int size;
13769 int ret = 0;
13770
Daniel Veillard0161e632008-08-28 15:36:32 +000013771 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013772 return(XML_ERR_ENTITY_LOOP);
13773 }
13774
13775
Daniel Veillardcda96922001-08-21 10:56:31 +000013776 if (lst != NULL)
13777 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013778 if (string == NULL)
13779 return(-1);
13780
13781 size = xmlStrlen(string);
13782
13783 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13784 if (ctxt == NULL) return(-1);
13785 ctxt->userData = ctxt;
13786 if (sax != NULL) {
13787 oldsax = ctxt->sax;
13788 ctxt->sax = sax;
13789 if (user_data != NULL)
13790 ctxt->userData = user_data;
13791 }
13792 newDoc = xmlNewDoc(BAD_CAST "1.0");
13793 if (newDoc == NULL) {
13794 xmlFreeParserCtxt(ctxt);
13795 return(-1);
13796 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013797 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013798 if ((doc != NULL) && (doc->dict != NULL)) {
13799 xmlDictFree(ctxt->dict);
13800 ctxt->dict = doc->dict;
13801 xmlDictReference(ctxt->dict);
13802 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13803 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13804 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13805 ctxt->dictNames = 1;
13806 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013807 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013808 }
Owen Taylor3473f882001-02-23 17:55:21 +000013809 if (doc != NULL) {
13810 newDoc->intSubset = doc->intSubset;
13811 newDoc->extSubset = doc->extSubset;
13812 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013813 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13814 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013815 if (sax != NULL)
13816 ctxt->sax = oldsax;
13817 xmlFreeParserCtxt(ctxt);
13818 newDoc->intSubset = NULL;
13819 newDoc->extSubset = NULL;
13820 xmlFreeDoc(newDoc);
13821 return(-1);
13822 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013823 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13824 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013825 if (doc == NULL) {
13826 ctxt->myDoc = newDoc;
13827 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013828 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013829 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013830 /* Ensure that doc has XML spec namespace */
13831 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13832 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013833 }
13834 ctxt->instate = XML_PARSER_CONTENT;
13835 ctxt->depth = depth;
13836
13837 /*
13838 * Doing validity checking on chunk doesn't make sense
13839 */
13840 ctxt->validate = 0;
13841 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013842 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013843
Daniel Veillardb39bc392002-10-26 19:29:51 +000013844 if ( doc != NULL ){
13845 content = doc->children;
13846 doc->children = NULL;
13847 xmlParseContent(ctxt);
13848 doc->children = content;
13849 }
13850 else {
13851 xmlParseContent(ctxt);
13852 }
Owen Taylor3473f882001-02-23 17:55:21 +000013853 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013854 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013855 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013856 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013857 }
13858 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013859 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013860 }
13861
13862 if (!ctxt->wellFormed) {
13863 if (ctxt->errNo == 0)
13864 ret = 1;
13865 else
13866 ret = ctxt->errNo;
13867 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013868 ret = 0;
13869 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013870
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013871 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13872 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013873
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013874 /*
13875 * Return the newly created nodeset after unlinking it from
13876 * they pseudo parent.
13877 */
13878 cur = newDoc->children->children;
13879 *lst = cur;
13880 while (cur != NULL) {
13881 xmlSetTreeDoc(cur, doc);
13882 cur->parent = NULL;
13883 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013884 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013885 newDoc->children->children = NULL;
13886 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013887
13888 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013889 ctxt->sax = oldsax;
13890 xmlFreeParserCtxt(ctxt);
13891 newDoc->intSubset = NULL;
13892 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013893 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013894 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013895
Owen Taylor3473f882001-02-23 17:55:21 +000013896 return(ret);
13897}
13898
13899/**
13900 * xmlSAXParseEntity:
13901 * @sax: the SAX handler block
13902 * @filename: the filename
13903 *
13904 * parse an XML external entity out of context and build a tree.
13905 * It use the given SAX function block to handle the parsing callback.
13906 * If sax is NULL, fallback to the default DOM tree building routines.
13907 *
13908 * [78] extParsedEnt ::= TextDecl? content
13909 *
13910 * This correspond to a "Well Balanced" chunk
13911 *
13912 * Returns the resulting document tree
13913 */
13914
13915xmlDocPtr
13916xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13917 xmlDocPtr ret;
13918 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013919
13920 ctxt = xmlCreateFileParserCtxt(filename);
13921 if (ctxt == NULL) {
13922 return(NULL);
13923 }
13924 if (sax != NULL) {
13925 if (ctxt->sax != NULL)
13926 xmlFree(ctxt->sax);
13927 ctxt->sax = sax;
13928 ctxt->userData = NULL;
13929 }
13930
Owen Taylor3473f882001-02-23 17:55:21 +000013931 xmlParseExtParsedEnt(ctxt);
13932
13933 if (ctxt->wellFormed)
13934 ret = ctxt->myDoc;
13935 else {
13936 ret = NULL;
13937 xmlFreeDoc(ctxt->myDoc);
13938 ctxt->myDoc = NULL;
13939 }
13940 if (sax != NULL)
13941 ctxt->sax = NULL;
13942 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013943
Owen Taylor3473f882001-02-23 17:55:21 +000013944 return(ret);
13945}
13946
13947/**
13948 * xmlParseEntity:
13949 * @filename: the filename
13950 *
13951 * parse an XML external entity out of context and build a tree.
13952 *
13953 * [78] extParsedEnt ::= TextDecl? content
13954 *
13955 * This correspond to a "Well Balanced" chunk
13956 *
13957 * Returns the resulting document tree
13958 */
13959
13960xmlDocPtr
13961xmlParseEntity(const char *filename) {
13962 return(xmlSAXParseEntity(NULL, filename));
13963}
Daniel Veillard81273902003-09-30 00:43:48 +000013964#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013965
13966/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013967 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013968 * @URL: the entity URL
13969 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013970 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013971 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013972 *
13973 * Create a parser context for an external entity
13974 * Automatic support for ZLIB/Compress compressed document is provided
13975 * by default if found at compile-time.
13976 *
13977 * Returns the new parser context or NULL
13978 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013979static xmlParserCtxtPtr
13980xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13981 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013982 xmlParserCtxtPtr ctxt;
13983 xmlParserInputPtr inputStream;
13984 char *directory = NULL;
13985 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013986
Owen Taylor3473f882001-02-23 17:55:21 +000013987 ctxt = xmlNewParserCtxt();
13988 if (ctxt == NULL) {
13989 return(NULL);
13990 }
13991
Daniel Veillard48247b42009-07-10 16:12:46 +020013992 if (pctx != NULL) {
13993 ctxt->options = pctx->options;
13994 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013995 }
13996
Owen Taylor3473f882001-02-23 17:55:21 +000013997 uri = xmlBuildURI(URL, base);
13998
13999 if (uri == NULL) {
14000 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14001 if (inputStream == NULL) {
14002 xmlFreeParserCtxt(ctxt);
14003 return(NULL);
14004 }
14005
14006 inputPush(ctxt, inputStream);
14007
14008 if ((ctxt->directory == NULL) && (directory == NULL))
14009 directory = xmlParserGetDirectory((char *)URL);
14010 if ((ctxt->directory == NULL) && (directory != NULL))
14011 ctxt->directory = directory;
14012 } else {
14013 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14014 if (inputStream == NULL) {
14015 xmlFree(uri);
14016 xmlFreeParserCtxt(ctxt);
14017 return(NULL);
14018 }
14019
14020 inputPush(ctxt, inputStream);
14021
14022 if ((ctxt->directory == NULL) && (directory == NULL))
14023 directory = xmlParserGetDirectory((char *)uri);
14024 if ((ctxt->directory == NULL) && (directory != NULL))
14025 ctxt->directory = directory;
14026 xmlFree(uri);
14027 }
Owen Taylor3473f882001-02-23 17:55:21 +000014028 return(ctxt);
14029}
14030
Rob Richards9c0aa472009-03-26 18:10:19 +000014031/**
14032 * xmlCreateEntityParserCtxt:
14033 * @URL: the entity URL
14034 * @ID: the entity PUBLIC ID
14035 * @base: a possible base for the target URI
14036 *
14037 * Create a parser context for an external entity
14038 * Automatic support for ZLIB/Compress compressed document is provided
14039 * by default if found at compile-time.
14040 *
14041 * Returns the new parser context or NULL
14042 */
14043xmlParserCtxtPtr
14044xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14045 const xmlChar *base) {
14046 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14047
14048}
14049
Owen Taylor3473f882001-02-23 17:55:21 +000014050/************************************************************************
14051 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014052 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014053 * *
14054 ************************************************************************/
14055
14056/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014057 * xmlCreateURLParserCtxt:
14058 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014059 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014060 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014061 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014062 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014063 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014064 *
14065 * Returns the new parser context or NULL
14066 */
14067xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014068xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014069{
14070 xmlParserCtxtPtr ctxt;
14071 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014072 char *directory = NULL;
14073
Owen Taylor3473f882001-02-23 17:55:21 +000014074 ctxt = xmlNewParserCtxt();
14075 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014076 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014077 return(NULL);
14078 }
14079
Daniel Veillarddf292f72005-01-16 19:00:15 +000014080 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014081 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014082 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014083
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014084 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014085 if (inputStream == NULL) {
14086 xmlFreeParserCtxt(ctxt);
14087 return(NULL);
14088 }
14089
Owen Taylor3473f882001-02-23 17:55:21 +000014090 inputPush(ctxt, inputStream);
14091 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014092 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014093 if ((ctxt->directory == NULL) && (directory != NULL))
14094 ctxt->directory = directory;
14095
14096 return(ctxt);
14097}
14098
Daniel Veillard61b93382003-11-03 14:28:31 +000014099/**
14100 * xmlCreateFileParserCtxt:
14101 * @filename: the filename
14102 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014103 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014104 * Automatic support for ZLIB/Compress compressed document is provided
14105 * by default if found at compile-time.
14106 *
14107 * Returns the new parser context or NULL
14108 */
14109xmlParserCtxtPtr
14110xmlCreateFileParserCtxt(const char *filename)
14111{
14112 return(xmlCreateURLParserCtxt(filename, 0));
14113}
14114
Daniel Veillard81273902003-09-30 00:43:48 +000014115#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014116/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014117 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014118 * @sax: the SAX handler block
14119 * @filename: the filename
14120 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14121 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014122 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014123 *
14124 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14125 * compressed document is provided by default if found at compile-time.
14126 * It use the given SAX function block to handle the parsing callback.
14127 * If sax is NULL, fallback to the default DOM tree building routines.
14128 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014129 * User data (void *) is stored within the parser context in the
14130 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014131 *
Owen Taylor3473f882001-02-23 17:55:21 +000014132 * Returns the resulting document tree
14133 */
14134
14135xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014136xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14137 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014138 xmlDocPtr ret;
14139 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014140
Daniel Veillard635ef722001-10-29 11:48:19 +000014141 xmlInitParser();
14142
Owen Taylor3473f882001-02-23 17:55:21 +000014143 ctxt = xmlCreateFileParserCtxt(filename);
14144 if (ctxt == NULL) {
14145 return(NULL);
14146 }
14147 if (sax != NULL) {
14148 if (ctxt->sax != NULL)
14149 xmlFree(ctxt->sax);
14150 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014151 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014152 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014153 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014154 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014155 }
Owen Taylor3473f882001-02-23 17:55:21 +000014156
Daniel Veillard37d2d162008-03-14 10:54:00 +000014157 if (ctxt->directory == NULL)
14158 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014159
Daniel Veillarddad3f682002-11-17 16:47:27 +000014160 ctxt->recovery = recovery;
14161
Owen Taylor3473f882001-02-23 17:55:21 +000014162 xmlParseDocument(ctxt);
14163
William M. Brackc07329e2003-09-08 01:57:30 +000014164 if ((ctxt->wellFormed) || recovery) {
14165 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014166 if (ret != NULL) {
14167 if (ctxt->input->buf->compressed > 0)
14168 ret->compression = 9;
14169 else
14170 ret->compression = ctxt->input->buf->compressed;
14171 }
William M. Brackc07329e2003-09-08 01:57:30 +000014172 }
Owen Taylor3473f882001-02-23 17:55:21 +000014173 else {
14174 ret = NULL;
14175 xmlFreeDoc(ctxt->myDoc);
14176 ctxt->myDoc = NULL;
14177 }
14178 if (sax != NULL)
14179 ctxt->sax = NULL;
14180 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014181
Owen Taylor3473f882001-02-23 17:55:21 +000014182 return(ret);
14183}
14184
14185/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014186 * xmlSAXParseFile:
14187 * @sax: the SAX handler block
14188 * @filename: the filename
14189 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14190 * documents
14191 *
14192 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14193 * compressed document is provided by default if found at compile-time.
14194 * It use the given SAX function block to handle the parsing callback.
14195 * If sax is NULL, fallback to the default DOM tree building routines.
14196 *
14197 * Returns the resulting document tree
14198 */
14199
14200xmlDocPtr
14201xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14202 int recovery) {
14203 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14204}
14205
14206/**
Owen Taylor3473f882001-02-23 17:55:21 +000014207 * xmlRecoverDoc:
14208 * @cur: a pointer to an array of xmlChar
14209 *
14210 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014211 * In the case the document is not Well Formed, a attempt to build a
14212 * tree is tried anyway
14213 *
14214 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014215 */
14216
14217xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014218xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014219 return(xmlSAXParseDoc(NULL, cur, 1));
14220}
14221
14222/**
14223 * xmlParseFile:
14224 * @filename: the filename
14225 *
14226 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14227 * compressed document is provided by default if found at compile-time.
14228 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014229 * Returns the resulting document tree if the file was wellformed,
14230 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014231 */
14232
14233xmlDocPtr
14234xmlParseFile(const char *filename) {
14235 return(xmlSAXParseFile(NULL, filename, 0));
14236}
14237
14238/**
14239 * xmlRecoverFile:
14240 * @filename: the filename
14241 *
14242 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14243 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014244 * In the case the document is not Well Formed, it attempts to build
14245 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014246 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014247 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014248 */
14249
14250xmlDocPtr
14251xmlRecoverFile(const char *filename) {
14252 return(xmlSAXParseFile(NULL, filename, 1));
14253}
14254
14255
14256/**
14257 * xmlSetupParserForBuffer:
14258 * @ctxt: an XML parser context
14259 * @buffer: a xmlChar * buffer
14260 * @filename: a file name
14261 *
14262 * Setup the parser context to parse a new buffer; Clears any prior
14263 * contents from the parser context. The buffer parameter must not be
14264 * NULL, but the filename parameter can be
14265 */
14266void
14267xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14268 const char* filename)
14269{
14270 xmlParserInputPtr input;
14271
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014272 if ((ctxt == NULL) || (buffer == NULL))
14273 return;
14274
Owen Taylor3473f882001-02-23 17:55:21 +000014275 input = xmlNewInputStream(ctxt);
14276 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014277 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014278 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014279 return;
14280 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014281
Owen Taylor3473f882001-02-23 17:55:21 +000014282 xmlClearParserCtxt(ctxt);
14283 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014284 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014285 input->base = buffer;
14286 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014287 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014288 inputPush(ctxt, input);
14289}
14290
14291/**
14292 * xmlSAXUserParseFile:
14293 * @sax: a SAX handler
14294 * @user_data: The user data returned on SAX callbacks
14295 * @filename: a file name
14296 *
14297 * parse an XML file and call the given SAX handler routines.
14298 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014299 *
Owen Taylor3473f882001-02-23 17:55:21 +000014300 * Returns 0 in case of success or a error number otherwise
14301 */
14302int
14303xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14304 const char *filename) {
14305 int ret = 0;
14306 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014307
Owen Taylor3473f882001-02-23 17:55:21 +000014308 ctxt = xmlCreateFileParserCtxt(filename);
14309 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014310 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014311 xmlFree(ctxt->sax);
14312 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014313 xmlDetectSAX2(ctxt);
14314
Owen Taylor3473f882001-02-23 17:55:21 +000014315 if (user_data != NULL)
14316 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014317
Owen Taylor3473f882001-02-23 17:55:21 +000014318 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014319
Owen Taylor3473f882001-02-23 17:55:21 +000014320 if (ctxt->wellFormed)
14321 ret = 0;
14322 else {
14323 if (ctxt->errNo != 0)
14324 ret = ctxt->errNo;
14325 else
14326 ret = -1;
14327 }
14328 if (sax != NULL)
14329 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014330 if (ctxt->myDoc != NULL) {
14331 xmlFreeDoc(ctxt->myDoc);
14332 ctxt->myDoc = NULL;
14333 }
Owen Taylor3473f882001-02-23 17:55:21 +000014334 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014335
Owen Taylor3473f882001-02-23 17:55:21 +000014336 return ret;
14337}
Daniel Veillard81273902003-09-30 00:43:48 +000014338#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014339
14340/************************************************************************
14341 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014342 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014343 * *
14344 ************************************************************************/
14345
14346/**
14347 * xmlCreateMemoryParserCtxt:
14348 * @buffer: a pointer to a char array
14349 * @size: the size of the array
14350 *
14351 * Create a parser context for an XML in-memory document.
14352 *
14353 * Returns the new parser context or NULL
14354 */
14355xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014356xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014357 xmlParserCtxtPtr ctxt;
14358 xmlParserInputPtr input;
14359 xmlParserInputBufferPtr buf;
14360
14361 if (buffer == NULL)
14362 return(NULL);
14363 if (size <= 0)
14364 return(NULL);
14365
14366 ctxt = xmlNewParserCtxt();
14367 if (ctxt == NULL)
14368 return(NULL);
14369
Daniel Veillard53350552003-09-18 13:35:51 +000014370 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014371 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014372 if (buf == NULL) {
14373 xmlFreeParserCtxt(ctxt);
14374 return(NULL);
14375 }
Owen Taylor3473f882001-02-23 17:55:21 +000014376
14377 input = xmlNewInputStream(ctxt);
14378 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014379 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014380 xmlFreeParserCtxt(ctxt);
14381 return(NULL);
14382 }
14383
14384 input->filename = NULL;
14385 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014386 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014387
14388 inputPush(ctxt, input);
14389 return(ctxt);
14390}
14391
Daniel Veillard81273902003-09-30 00:43:48 +000014392#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014393/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014394 * xmlSAXParseMemoryWithData:
14395 * @sax: the SAX handler block
14396 * @buffer: an pointer to a char array
14397 * @size: the size of the array
14398 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14399 * documents
14400 * @data: the userdata
14401 *
14402 * parse an XML in-memory block and use the given SAX function block
14403 * to handle the parsing callback. If sax is NULL, fallback to the default
14404 * DOM tree building routines.
14405 *
14406 * User data (void *) is stored within the parser context in the
14407 * context's _private member, so it is available nearly everywhere in libxml
14408 *
14409 * Returns the resulting document tree
14410 */
14411
14412xmlDocPtr
14413xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14414 int size, int recovery, void *data) {
14415 xmlDocPtr ret;
14416 xmlParserCtxtPtr ctxt;
14417
Daniel Veillardab2a7632009-07-09 08:45:03 +020014418 xmlInitParser();
14419
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014420 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14421 if (ctxt == NULL) return(NULL);
14422 if (sax != NULL) {
14423 if (ctxt->sax != NULL)
14424 xmlFree(ctxt->sax);
14425 ctxt->sax = sax;
14426 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014427 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014428 if (data!=NULL) {
14429 ctxt->_private=data;
14430 }
14431
Daniel Veillardadba5f12003-04-04 16:09:01 +000014432 ctxt->recovery = recovery;
14433
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014434 xmlParseDocument(ctxt);
14435
14436 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14437 else {
14438 ret = NULL;
14439 xmlFreeDoc(ctxt->myDoc);
14440 ctxt->myDoc = NULL;
14441 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014442 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014443 ctxt->sax = NULL;
14444 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014445
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014446 return(ret);
14447}
14448
14449/**
Owen Taylor3473f882001-02-23 17:55:21 +000014450 * xmlSAXParseMemory:
14451 * @sax: the SAX handler block
14452 * @buffer: an pointer to a char array
14453 * @size: the size of the array
14454 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14455 * documents
14456 *
14457 * parse an XML in-memory block and use the given SAX function block
14458 * to handle the parsing callback. If sax is NULL, fallback to the default
14459 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014460 *
Owen Taylor3473f882001-02-23 17:55:21 +000014461 * Returns the resulting document tree
14462 */
14463xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014464xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14465 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014466 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014467}
14468
14469/**
14470 * xmlParseMemory:
14471 * @buffer: an pointer to a char array
14472 * @size: the size of the array
14473 *
14474 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014475 *
Owen Taylor3473f882001-02-23 17:55:21 +000014476 * Returns the resulting document tree
14477 */
14478
Daniel Veillard50822cb2001-07-26 20:05:51 +000014479xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014480 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14481}
14482
14483/**
14484 * xmlRecoverMemory:
14485 * @buffer: an pointer to a char array
14486 * @size: the size of the array
14487 *
14488 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014489 * In the case the document is not Well Formed, an attempt to
14490 * build a tree is tried anyway
14491 *
14492 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014493 */
14494
Daniel Veillard50822cb2001-07-26 20:05:51 +000014495xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014496 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14497}
14498
14499/**
14500 * xmlSAXUserParseMemory:
14501 * @sax: a SAX handler
14502 * @user_data: The user data returned on SAX callbacks
14503 * @buffer: an in-memory XML document input
14504 * @size: the length of the XML document in bytes
14505 *
14506 * A better SAX parsing routine.
14507 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014508 *
Owen Taylor3473f882001-02-23 17:55:21 +000014509 * Returns 0 in case of success or a error number otherwise
14510 */
14511int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014512 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014513 int ret = 0;
14514 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014515
14516 xmlInitParser();
14517
Owen Taylor3473f882001-02-23 17:55:21 +000014518 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14519 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014520 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14521 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014522 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014523 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014524
Daniel Veillard30211a02001-04-26 09:33:18 +000014525 if (user_data != NULL)
14526 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014527
Owen Taylor3473f882001-02-23 17:55:21 +000014528 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014529
Owen Taylor3473f882001-02-23 17:55:21 +000014530 if (ctxt->wellFormed)
14531 ret = 0;
14532 else {
14533 if (ctxt->errNo != 0)
14534 ret = ctxt->errNo;
14535 else
14536 ret = -1;
14537 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014538 if (sax != NULL)
14539 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014540 if (ctxt->myDoc != NULL) {
14541 xmlFreeDoc(ctxt->myDoc);
14542 ctxt->myDoc = NULL;
14543 }
Owen Taylor3473f882001-02-23 17:55:21 +000014544 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014545
Owen Taylor3473f882001-02-23 17:55:21 +000014546 return ret;
14547}
Daniel Veillard81273902003-09-30 00:43:48 +000014548#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014549
14550/**
14551 * xmlCreateDocParserCtxt:
14552 * @cur: a pointer to an array of xmlChar
14553 *
14554 * Creates a parser context for an XML in-memory document.
14555 *
14556 * Returns the new parser context or NULL
14557 */
14558xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014559xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014560 int len;
14561
14562 if (cur == NULL)
14563 return(NULL);
14564 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014565 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014566}
14567
Daniel Veillard81273902003-09-30 00:43:48 +000014568#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014569/**
14570 * xmlSAXParseDoc:
14571 * @sax: the SAX handler block
14572 * @cur: a pointer to an array of xmlChar
14573 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14574 * documents
14575 *
14576 * parse an XML in-memory document and build a tree.
14577 * It use the given SAX function block to handle the parsing callback.
14578 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014579 *
Owen Taylor3473f882001-02-23 17:55:21 +000014580 * Returns the resulting document tree
14581 */
14582
14583xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014584xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014585 xmlDocPtr ret;
14586 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014587 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014588
Daniel Veillard38936062004-11-04 17:45:11 +000014589 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014590
14591
14592 ctxt = xmlCreateDocParserCtxt(cur);
14593 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014594 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014595 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014596 ctxt->sax = sax;
14597 ctxt->userData = NULL;
14598 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014599 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014600
14601 xmlParseDocument(ctxt);
14602 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14603 else {
14604 ret = NULL;
14605 xmlFreeDoc(ctxt->myDoc);
14606 ctxt->myDoc = NULL;
14607 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014608 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014609 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014610 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014611
Owen Taylor3473f882001-02-23 17:55:21 +000014612 return(ret);
14613}
14614
14615/**
14616 * xmlParseDoc:
14617 * @cur: a pointer to an array of xmlChar
14618 *
14619 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014620 *
Owen Taylor3473f882001-02-23 17:55:21 +000014621 * Returns the resulting document tree
14622 */
14623
14624xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014625xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014626 return(xmlSAXParseDoc(NULL, cur, 0));
14627}
Daniel Veillard81273902003-09-30 00:43:48 +000014628#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014629
Daniel Veillard81273902003-09-30 00:43:48 +000014630#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014631/************************************************************************
14632 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014633 * Specific function to keep track of entities references *
14634 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014635 * *
14636 ************************************************************************/
14637
14638static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14639
14640/**
14641 * xmlAddEntityReference:
14642 * @ent : A valid entity
14643 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014644 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014645 *
14646 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14647 */
14648static void
14649xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14650 xmlNodePtr lastNode)
14651{
14652 if (xmlEntityRefFunc != NULL) {
14653 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14654 }
14655}
14656
14657
14658/**
14659 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014660 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014661 *
14662 * Set the function to call call back when a xml reference has been made
14663 */
14664void
14665xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14666{
14667 xmlEntityRefFunc = func;
14668}
Daniel Veillard81273902003-09-30 00:43:48 +000014669#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014670
14671/************************************************************************
14672 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014673 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014674 * *
14675 ************************************************************************/
14676
14677#ifdef LIBXML_XPATH_ENABLED
14678#include <libxml/xpath.h>
14679#endif
14680
Daniel Veillardffa3c742005-07-21 13:24:09 +000014681extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014682static int xmlParserInitialized = 0;
14683
14684/**
14685 * xmlInitParser:
14686 *
14687 * Initialization function for the XML parser.
14688 * This is not reentrant. Call once before processing in case of
14689 * use in multithreaded programs.
14690 */
14691
14692void
14693xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014694 if (xmlParserInitialized != 0)
14695 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014696
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014697#ifdef LIBXML_THREAD_ENABLED
14698 __xmlGlobalInitMutexLock();
14699 if (xmlParserInitialized == 0) {
14700#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014701 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014702 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014703 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14704 (xmlGenericError == NULL))
14705 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014706 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014707 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014708 xmlInitCharEncodingHandlers();
14709 xmlDefaultSAXHandlerInit();
14710 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014711#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014712 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014713#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014714#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014715 htmlInitAutoClose();
14716 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014717#endif
14718#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014719 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014720#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014721 xmlParserInitialized = 1;
14722#ifdef LIBXML_THREAD_ENABLED
14723 }
14724 __xmlGlobalInitMutexUnlock();
14725#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014726}
14727
14728/**
14729 * xmlCleanupParser:
14730 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014731 * This function name is somewhat misleading. It does not clean up
14732 * parser state, it cleans up memory allocated by the library itself.
14733 * It is a cleanup function for the XML library. It tries to reclaim all
14734 * related global memory allocated for the library processing.
14735 * It doesn't deallocate any document related memory. One should
14736 * call xmlCleanupParser() only when the process has finished using
14737 * the library and all XML/HTML documents built with it.
14738 * See also xmlInitParser() which has the opposite function of preparing
14739 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014740 *
14741 * WARNING: if your application is multithreaded or has plugin support
14742 * calling this may crash the application if another thread or
14743 * a plugin is still using libxml2. It's sometimes very hard to
14744 * guess if libxml2 is in use in the application, some libraries
14745 * or plugins may use it without notice. In case of doubt abstain
14746 * from calling this function or do it just before calling exit()
14747 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014748 */
14749
14750void
14751xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014752 if (!xmlParserInitialized)
14753 return;
14754
Owen Taylor3473f882001-02-23 17:55:21 +000014755 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014756#ifdef LIBXML_CATALOG_ENABLED
14757 xmlCatalogCleanup();
14758#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014759 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014760 xmlCleanupInputCallbacks();
14761#ifdef LIBXML_OUTPUT_ENABLED
14762 xmlCleanupOutputCallbacks();
14763#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014764#ifdef LIBXML_SCHEMAS_ENABLED
14765 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014766 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014767#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014768 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014769 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014770 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014771 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014772 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014773}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014774
14775/************************************************************************
14776 * *
14777 * New set (2.6.0) of simpler and more flexible APIs *
14778 * *
14779 ************************************************************************/
14780
14781/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014782 * DICT_FREE:
14783 * @str: a string
14784 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014785 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014786 * current scope
14787 */
14788#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014789 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014790 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14791 xmlFree((char *)(str));
14792
14793/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014794 * xmlCtxtReset:
14795 * @ctxt: an XML parser context
14796 *
14797 * Reset a parser context
14798 */
14799void
14800xmlCtxtReset(xmlParserCtxtPtr ctxt)
14801{
14802 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014803 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014804
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014805 if (ctxt == NULL)
14806 return;
14807
14808 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014809
14810 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14811 xmlFreeInputStream(input);
14812 }
14813 ctxt->inputNr = 0;
14814 ctxt->input = NULL;
14815
14816 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014817 if (ctxt->spaceTab != NULL) {
14818 ctxt->spaceTab[0] = -1;
14819 ctxt->space = &ctxt->spaceTab[0];
14820 } else {
14821 ctxt->space = NULL;
14822 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014823
14824
14825 ctxt->nodeNr = 0;
14826 ctxt->node = NULL;
14827
14828 ctxt->nameNr = 0;
14829 ctxt->name = NULL;
14830
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014831 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014832 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014833 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014834 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014835 DICT_FREE(ctxt->directory);
14836 ctxt->directory = NULL;
14837 DICT_FREE(ctxt->extSubURI);
14838 ctxt->extSubURI = NULL;
14839 DICT_FREE(ctxt->extSubSystem);
14840 ctxt->extSubSystem = NULL;
14841 if (ctxt->myDoc != NULL)
14842 xmlFreeDoc(ctxt->myDoc);
14843 ctxt->myDoc = NULL;
14844
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014845 ctxt->standalone = -1;
14846 ctxt->hasExternalSubset = 0;
14847 ctxt->hasPErefs = 0;
14848 ctxt->html = 0;
14849 ctxt->external = 0;
14850 ctxt->instate = XML_PARSER_START;
14851 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014852
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014853 ctxt->wellFormed = 1;
14854 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014855 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014856 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014857#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014858 ctxt->vctxt.userData = ctxt;
14859 ctxt->vctxt.error = xmlParserValidityError;
14860 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014861#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014862 ctxt->record_info = 0;
14863 ctxt->nbChars = 0;
14864 ctxt->checkIndex = 0;
14865 ctxt->inSubset = 0;
14866 ctxt->errNo = XML_ERR_OK;
14867 ctxt->depth = 0;
14868 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14869 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014870 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014871 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014872 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014873 xmlInitNodeInfoSeq(&ctxt->node_seq);
14874
14875 if (ctxt->attsDefault != NULL) {
14876 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14877 ctxt->attsDefault = NULL;
14878 }
14879 if (ctxt->attsSpecial != NULL) {
14880 xmlHashFree(ctxt->attsSpecial, NULL);
14881 ctxt->attsSpecial = NULL;
14882 }
14883
Daniel Veillard4432df22003-09-28 18:58:27 +000014884#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014885 if (ctxt->catalogs != NULL)
14886 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014887#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014888 if (ctxt->lastError.code != XML_ERR_OK)
14889 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014890}
14891
14892/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014893 * xmlCtxtResetPush:
14894 * @ctxt: an XML parser context
14895 * @chunk: a pointer to an array of chars
14896 * @size: number of chars in the array
14897 * @filename: an optional file name or URI
14898 * @encoding: the document encoding, or NULL
14899 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014900 * Reset a push parser context
14901 *
14902 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014903 */
14904int
14905xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14906 int size, const char *filename, const char *encoding)
14907{
14908 xmlParserInputPtr inputStream;
14909 xmlParserInputBufferPtr buf;
14910 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14911
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014912 if (ctxt == NULL)
14913 return(1);
14914
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014915 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14916 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14917
14918 buf = xmlAllocParserInputBuffer(enc);
14919 if (buf == NULL)
14920 return(1);
14921
14922 if (ctxt == NULL) {
14923 xmlFreeParserInputBuffer(buf);
14924 return(1);
14925 }
14926
14927 xmlCtxtReset(ctxt);
14928
14929 if (ctxt->pushTab == NULL) {
14930 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14931 sizeof(xmlChar *));
14932 if (ctxt->pushTab == NULL) {
14933 xmlErrMemory(ctxt, NULL);
14934 xmlFreeParserInputBuffer(buf);
14935 return(1);
14936 }
14937 }
14938
14939 if (filename == NULL) {
14940 ctxt->directory = NULL;
14941 } else {
14942 ctxt->directory = xmlParserGetDirectory(filename);
14943 }
14944
14945 inputStream = xmlNewInputStream(ctxt);
14946 if (inputStream == NULL) {
14947 xmlFreeParserInputBuffer(buf);
14948 return(1);
14949 }
14950
14951 if (filename == NULL)
14952 inputStream->filename = NULL;
14953 else
14954 inputStream->filename = (char *)
14955 xmlCanonicPath((const xmlChar *) filename);
14956 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014957 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014958
14959 inputPush(ctxt, inputStream);
14960
14961 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14962 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014963 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14964 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014965
14966 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14967
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014968 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014969#ifdef DEBUG_PUSH
14970 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14971#endif
14972 }
14973
14974 if (encoding != NULL) {
14975 xmlCharEncodingHandlerPtr hdlr;
14976
Daniel Veillard37334572008-07-31 08:20:02 +000014977 if (ctxt->encoding != NULL)
14978 xmlFree((xmlChar *) ctxt->encoding);
14979 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14980
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014981 hdlr = xmlFindCharEncodingHandler(encoding);
14982 if (hdlr != NULL) {
14983 xmlSwitchToEncoding(ctxt, hdlr);
14984 } else {
14985 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14986 "Unsupported encoding %s\n", BAD_CAST encoding);
14987 }
14988 } else if (enc != XML_CHAR_ENCODING_NONE) {
14989 xmlSwitchEncoding(ctxt, enc);
14990 }
14991
14992 return(0);
14993}
14994
Daniel Veillard37334572008-07-31 08:20:02 +000014995
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014996/**
Daniel Veillard37334572008-07-31 08:20:02 +000014997 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014998 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014999 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015000 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015001 *
15002 * Applies the options to the parser context
15003 *
15004 * Returns 0 in case of success, the set of unknown or unimplemented options
15005 * in case of error.
15006 */
Daniel Veillard37334572008-07-31 08:20:02 +000015007static int
15008xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015009{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015010 if (ctxt == NULL)
15011 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015012 if (encoding != NULL) {
15013 if (ctxt->encoding != NULL)
15014 xmlFree((xmlChar *) ctxt->encoding);
15015 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15016 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015017 if (options & XML_PARSE_RECOVER) {
15018 ctxt->recovery = 1;
15019 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015020 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015021 } else
15022 ctxt->recovery = 0;
15023 if (options & XML_PARSE_DTDLOAD) {
15024 ctxt->loadsubset = XML_DETECT_IDS;
15025 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015026 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015027 } else
15028 ctxt->loadsubset = 0;
15029 if (options & XML_PARSE_DTDATTR) {
15030 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15031 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015032 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015033 }
15034 if (options & XML_PARSE_NOENT) {
15035 ctxt->replaceEntities = 1;
15036 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15037 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015038 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015039 } else
15040 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015041 if (options & XML_PARSE_PEDANTIC) {
15042 ctxt->pedantic = 1;
15043 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015044 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015045 } else
15046 ctxt->pedantic = 0;
15047 if (options & XML_PARSE_NOBLANKS) {
15048 ctxt->keepBlanks = 0;
15049 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15050 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015051 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015052 } else
15053 ctxt->keepBlanks = 1;
15054 if (options & XML_PARSE_DTDVALID) {
15055 ctxt->validate = 1;
15056 if (options & XML_PARSE_NOWARNING)
15057 ctxt->vctxt.warning = NULL;
15058 if (options & XML_PARSE_NOERROR)
15059 ctxt->vctxt.error = NULL;
15060 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015061 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015062 } else
15063 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015064 if (options & XML_PARSE_NOWARNING) {
15065 ctxt->sax->warning = NULL;
15066 options -= XML_PARSE_NOWARNING;
15067 }
15068 if (options & XML_PARSE_NOERROR) {
15069 ctxt->sax->error = NULL;
15070 ctxt->sax->fatalError = NULL;
15071 options -= XML_PARSE_NOERROR;
15072 }
Daniel Veillard81273902003-09-30 00:43:48 +000015073#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015074 if (options & XML_PARSE_SAX1) {
15075 ctxt->sax->startElement = xmlSAX2StartElement;
15076 ctxt->sax->endElement = xmlSAX2EndElement;
15077 ctxt->sax->startElementNs = NULL;
15078 ctxt->sax->endElementNs = NULL;
15079 ctxt->sax->initialized = 1;
15080 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015081 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015082 }
Daniel Veillard81273902003-09-30 00:43:48 +000015083#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015084 if (options & XML_PARSE_NODICT) {
15085 ctxt->dictNames = 0;
15086 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015087 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015088 } else {
15089 ctxt->dictNames = 1;
15090 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015091 if (options & XML_PARSE_NOCDATA) {
15092 ctxt->sax->cdataBlock = NULL;
15093 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015094 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015095 }
15096 if (options & XML_PARSE_NSCLEAN) {
15097 ctxt->options |= XML_PARSE_NSCLEAN;
15098 options -= XML_PARSE_NSCLEAN;
15099 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015100 if (options & XML_PARSE_NONET) {
15101 ctxt->options |= XML_PARSE_NONET;
15102 options -= XML_PARSE_NONET;
15103 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015104 if (options & XML_PARSE_COMPACT) {
15105 ctxt->options |= XML_PARSE_COMPACT;
15106 options -= XML_PARSE_COMPACT;
15107 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015108 if (options & XML_PARSE_OLD10) {
15109 ctxt->options |= XML_PARSE_OLD10;
15110 options -= XML_PARSE_OLD10;
15111 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015112 if (options & XML_PARSE_NOBASEFIX) {
15113 ctxt->options |= XML_PARSE_NOBASEFIX;
15114 options -= XML_PARSE_NOBASEFIX;
15115 }
15116 if (options & XML_PARSE_HUGE) {
15117 ctxt->options |= XML_PARSE_HUGE;
15118 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015119 if (ctxt->dict != NULL)
15120 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015121 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015122 if (options & XML_PARSE_OLDSAX) {
15123 ctxt->options |= XML_PARSE_OLDSAX;
15124 options -= XML_PARSE_OLDSAX;
15125 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015126 if (options & XML_PARSE_IGNORE_ENC) {
15127 ctxt->options |= XML_PARSE_IGNORE_ENC;
15128 options -= XML_PARSE_IGNORE_ENC;
15129 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015130 if (options & XML_PARSE_BIG_LINES) {
15131 ctxt->options |= XML_PARSE_BIG_LINES;
15132 options -= XML_PARSE_BIG_LINES;
15133 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015134 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015135 return (options);
15136}
15137
15138/**
Daniel Veillard37334572008-07-31 08:20:02 +000015139 * xmlCtxtUseOptions:
15140 * @ctxt: an XML parser context
15141 * @options: a combination of xmlParserOption
15142 *
15143 * Applies the options to the parser context
15144 *
15145 * Returns 0 in case of success, the set of unknown or unimplemented options
15146 * in case of error.
15147 */
15148int
15149xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15150{
15151 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15152}
15153
15154/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015155 * xmlDoRead:
15156 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015157 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015158 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015159 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015160 * @reuse: keep the context for reuse
15161 *
15162 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015163 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015164 * Returns the resulting document tree or NULL
15165 */
15166static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015167xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15168 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015169{
15170 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015171
15172 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015173 if (encoding != NULL) {
15174 xmlCharEncodingHandlerPtr hdlr;
15175
15176 hdlr = xmlFindCharEncodingHandler(encoding);
15177 if (hdlr != NULL)
15178 xmlSwitchToEncoding(ctxt, hdlr);
15179 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015180 if ((URL != NULL) && (ctxt->input != NULL) &&
15181 (ctxt->input->filename == NULL))
15182 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015183 xmlParseDocument(ctxt);
15184 if ((ctxt->wellFormed) || ctxt->recovery)
15185 ret = ctxt->myDoc;
15186 else {
15187 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015188 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015189 xmlFreeDoc(ctxt->myDoc);
15190 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015191 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015192 ctxt->myDoc = NULL;
15193 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015194 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015195 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015196
15197 return (ret);
15198}
15199
15200/**
15201 * xmlReadDoc:
15202 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015203 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015204 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015205 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015206 *
15207 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015208 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015209 * Returns the resulting document tree
15210 */
15211xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015212xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015213{
15214 xmlParserCtxtPtr ctxt;
15215
15216 if (cur == NULL)
15217 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015218 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015219
15220 ctxt = xmlCreateDocParserCtxt(cur);
15221 if (ctxt == NULL)
15222 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015223 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015224}
15225
15226/**
15227 * xmlReadFile:
15228 * @filename: a file or URL
15229 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015230 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015231 *
15232 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015233 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015234 * Returns the resulting document tree
15235 */
15236xmlDocPtr
15237xmlReadFile(const char *filename, const char *encoding, int options)
15238{
15239 xmlParserCtxtPtr ctxt;
15240
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015241 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015242 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015243 if (ctxt == NULL)
15244 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015245 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015246}
15247
15248/**
15249 * xmlReadMemory:
15250 * @buffer: a pointer to a char array
15251 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015252 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015253 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015254 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015255 *
15256 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015257 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015258 * Returns the resulting document tree
15259 */
15260xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015261xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015262{
15263 xmlParserCtxtPtr ctxt;
15264
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015265 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15267 if (ctxt == NULL)
15268 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015269 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015270}
15271
15272/**
15273 * xmlReadFd:
15274 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015275 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015276 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015277 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015278 *
15279 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015280 * NOTE that the file descriptor will not be closed when the
15281 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015282 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015283 * Returns the resulting document tree
15284 */
15285xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015286xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015287{
15288 xmlParserCtxtPtr ctxt;
15289 xmlParserInputBufferPtr input;
15290 xmlParserInputPtr stream;
15291
15292 if (fd < 0)
15293 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015294 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015295
15296 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15297 if (input == NULL)
15298 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015299 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015300 ctxt = xmlNewParserCtxt();
15301 if (ctxt == NULL) {
15302 xmlFreeParserInputBuffer(input);
15303 return (NULL);
15304 }
15305 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15306 if (stream == NULL) {
15307 xmlFreeParserInputBuffer(input);
15308 xmlFreeParserCtxt(ctxt);
15309 return (NULL);
15310 }
15311 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015312 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015313}
15314
15315/**
15316 * xmlReadIO:
15317 * @ioread: an I/O read function
15318 * @ioclose: an I/O close function
15319 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015320 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015321 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015322 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015323 *
15324 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015325 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015326 * Returns the resulting document tree
15327 */
15328xmlDocPtr
15329xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015330 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015331{
15332 xmlParserCtxtPtr ctxt;
15333 xmlParserInputBufferPtr input;
15334 xmlParserInputPtr stream;
15335
15336 if (ioread == NULL)
15337 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015338 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015339
15340 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15341 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015342 if (input == NULL) {
15343 if (ioclose != NULL)
15344 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015345 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015346 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015347 ctxt = xmlNewParserCtxt();
15348 if (ctxt == NULL) {
15349 xmlFreeParserInputBuffer(input);
15350 return (NULL);
15351 }
15352 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15353 if (stream == NULL) {
15354 xmlFreeParserInputBuffer(input);
15355 xmlFreeParserCtxt(ctxt);
15356 return (NULL);
15357 }
15358 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015359 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015360}
15361
15362/**
15363 * xmlCtxtReadDoc:
15364 * @ctxt: an XML parser context
15365 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015366 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015367 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015368 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015369 *
15370 * parse an XML in-memory document and build a tree.
15371 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015372 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015373 * Returns the resulting document tree
15374 */
15375xmlDocPtr
15376xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015377 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015378{
15379 xmlParserInputPtr stream;
15380
15381 if (cur == NULL)
15382 return (NULL);
15383 if (ctxt == NULL)
15384 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015385 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015386
15387 xmlCtxtReset(ctxt);
15388
15389 stream = xmlNewStringInputStream(ctxt, cur);
15390 if (stream == NULL) {
15391 return (NULL);
15392 }
15393 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015394 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015395}
15396
15397/**
15398 * xmlCtxtReadFile:
15399 * @ctxt: an XML parser context
15400 * @filename: a file or URL
15401 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015402 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015403 *
15404 * parse an XML file from the filesystem or the network.
15405 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015406 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015407 * Returns the resulting document tree
15408 */
15409xmlDocPtr
15410xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15411 const char *encoding, int options)
15412{
15413 xmlParserInputPtr stream;
15414
15415 if (filename == NULL)
15416 return (NULL);
15417 if (ctxt == NULL)
15418 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015419 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015420
15421 xmlCtxtReset(ctxt);
15422
Daniel Veillard29614c72004-11-26 10:47:26 +000015423 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015424 if (stream == NULL) {
15425 return (NULL);
15426 }
15427 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015428 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015429}
15430
15431/**
15432 * xmlCtxtReadMemory:
15433 * @ctxt: an XML parser context
15434 * @buffer: a pointer to a char array
15435 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015436 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015437 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015438 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015439 *
15440 * parse an XML in-memory document and build a tree.
15441 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015442 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015443 * Returns the resulting document tree
15444 */
15445xmlDocPtr
15446xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015447 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015448{
15449 xmlParserInputBufferPtr input;
15450 xmlParserInputPtr stream;
15451
15452 if (ctxt == NULL)
15453 return (NULL);
15454 if (buffer == NULL)
15455 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015456 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015457
15458 xmlCtxtReset(ctxt);
15459
15460 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15461 if (input == NULL) {
15462 return(NULL);
15463 }
15464
15465 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15466 if (stream == NULL) {
15467 xmlFreeParserInputBuffer(input);
15468 return(NULL);
15469 }
15470
15471 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015472 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015473}
15474
15475/**
15476 * xmlCtxtReadFd:
15477 * @ctxt: an XML parser context
15478 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015479 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015480 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015481 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015482 *
15483 * parse an XML from a file descriptor and build a tree.
15484 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015485 * NOTE that the file descriptor will not be closed when the
15486 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015487 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015488 * Returns the resulting document tree
15489 */
15490xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015491xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15492 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015493{
15494 xmlParserInputBufferPtr input;
15495 xmlParserInputPtr stream;
15496
15497 if (fd < 0)
15498 return (NULL);
15499 if (ctxt == NULL)
15500 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015501 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015502
15503 xmlCtxtReset(ctxt);
15504
15505
15506 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15507 if (input == NULL)
15508 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015509 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015510 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15511 if (stream == NULL) {
15512 xmlFreeParserInputBuffer(input);
15513 return (NULL);
15514 }
15515 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015516 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015517}
15518
15519/**
15520 * xmlCtxtReadIO:
15521 * @ctxt: an XML parser context
15522 * @ioread: an I/O read function
15523 * @ioclose: an I/O close function
15524 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015525 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015526 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015527 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015528 *
15529 * parse an XML document from I/O functions and source and build a tree.
15530 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015531 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015532 * Returns the resulting document tree
15533 */
15534xmlDocPtr
15535xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15536 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015537 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015538 const char *encoding, int options)
15539{
15540 xmlParserInputBufferPtr input;
15541 xmlParserInputPtr stream;
15542
15543 if (ioread == NULL)
15544 return (NULL);
15545 if (ctxt == NULL)
15546 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015547 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015548
15549 xmlCtxtReset(ctxt);
15550
15551 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15552 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015553 if (input == NULL) {
15554 if (ioclose != NULL)
15555 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015556 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015557 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015558 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15559 if (stream == NULL) {
15560 xmlFreeParserInputBuffer(input);
15561 return (NULL);
15562 }
15563 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015564 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015565}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015566
15567#define bottom_parser
15568#include "elfgcchack.h"