blob: 28d0cd2d25b5f41cdd0b32ad374d2f0e413a75b4 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Stéphane Michaut454e3972017-08-28 14:30:43 +020033/* To avoid EBCDIC trouble when parsing on zOS */
34#if defined(__MVS__)
35#pragma convert("ISO8859-1")
36#endif
37
Daniel Veillard34ce8be2002-03-18 19:37:11 +000038#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000039#include "libxml.h"
40
Daniel Veillard3c5ed912002-01-08 10:36:16 +000041#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000042#define XML_DIR_SEP '\\'
43#else
Owen Taylor3473f882001-02-23 17:55:21 +000044#define XML_DIR_SEP '/'
45#endif
46
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080048#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000050#include <stdarg.h>
Nick Wellnhofer855c19e2017-06-01 01:04:08 +020051#include <stddef.h>
Owen Taylor3473f882001-02-23 17:55:21 +000052#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000053#include <libxml/threads.h>
54#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000055#include <libxml/tree.h>
56#include <libxml/parser.h>
57#include <libxml/parserInternals.h>
58#include <libxml/valid.h>
59#include <libxml/entities.h>
60#include <libxml/xmlerror.h>
61#include <libxml/encoding.h>
62#include <libxml/xmlIO.h>
63#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000064#ifdef LIBXML_CATALOG_ENABLED
65#include <libxml/catalog.h>
66#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000067#ifdef LIBXML_SCHEMAS_ENABLED
68#include <libxml/xmlschemastypes.h>
69#include <libxml/relaxng.h>
70#endif
Owen Taylor3473f882001-02-23 17:55:21 +000071#ifdef HAVE_CTYPE_H
72#include <ctype.h>
73#endif
74#ifdef HAVE_STDLIB_H
75#include <stdlib.h>
76#endif
77#ifdef HAVE_SYS_STAT_H
78#include <sys/stat.h>
79#endif
80#ifdef HAVE_FCNTL_H
81#include <fcntl.h>
82#endif
83#ifdef HAVE_UNISTD_H
84#include <unistd.h>
85#endif
86#ifdef HAVE_ZLIB_H
87#include <zlib.h>
88#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020089#ifdef HAVE_LZMA_H
90#include <lzma.h>
91#endif
Owen Taylor3473f882001-02-23 17:55:21 +000092
Daniel Veillard768eb3b2012-07-16 14:19:49 +080093#include "buf.h"
94#include "enc.h"
95
Daniel Veillard0161e632008-08-28 15:36:32 +000096static void
97xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
98
Rob Richards9c0aa472009-03-26 18:10:19 +000099static xmlParserCtxtPtr
100xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
101 const xmlChar *base, xmlParserCtxtPtr pctx);
102
Daniel Veillard28cd9cb2015-11-20 14:55:30 +0800103static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
Daniel Veillard0161e632008-08-28 15:36:32 +0000105/************************************************************************
106 * *
107 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
108 * *
109 ************************************************************************/
110
111#define XML_PARSER_BIG_ENTITY 1000
112#define XML_PARSER_LOT_ENTITY 5000
113
114/*
115 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
116 * replacement over the size in byte of the input indicates that you have
117 * and eponential behaviour. A value of 10 correspond to at least 3 entity
118 * replacement per byte of input.
119 */
120#define XML_PARSER_NON_LINEAR 10
121
122/*
123 * xmlParserEntityCheck
124 *
125 * Function to check non-linear entity expansion behaviour
126 * This is here to detect and stop exponential linear entity expansion
127 * This is not a limitation of the parser but a safety
128 * boundary feature. It can be disabled with the XML_PARSE_HUGE
129 * parser option.
130 */
131static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800132xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000134{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800135 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000136
137 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
138 return (0);
139 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
140 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800141
142 /*
143 * This may look absurd but is needed to detect
144 * entities problems
145 */
146 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800147 (ent->content != NULL) && (ent->checked == 0) &&
148 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800149 unsigned long oldnbent = ctxt->nbentities;
150 xmlChar *rep;
151
152 ent->checked = 1;
153
Peter Simons8f30bdf2016-04-15 11:56:55 +0200154 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800155 rep = xmlStringDecodeEntities(ctxt, ent->content,
156 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200157 --ctxt->depth;
Daniel Veillardbdd66182016-05-23 12:27:58 +0800158 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
159 ent->content[0] = 0;
160 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800161
162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
163 if (rep != NULL) {
164 if (xmlStrchr(rep, '<'))
165 ent->checked |= 1;
166 xmlFree(rep);
167 rep = NULL;
168 }
169 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800170 if (replacement != 0) {
171 if (replacement < XML_MAX_TEXT_LENGTH)
172 return(0);
173
174 /*
175 * If the volume of entity copy reaches 10 times the
176 * amount of parsed data and over the large text threshold
177 * then that's very likely to be an abuse.
178 */
179 if (ctxt->input != NULL) {
180 consumed = ctxt->input->consumed +
181 (ctxt->input->cur - ctxt->input->base);
182 }
183 consumed += ctxt->sizeentities;
184
185 if (replacement < XML_PARSER_NON_LINEAR * consumed)
186 return(0);
187 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000188 /*
189 * Do the check based on the replacement size of the entity
190 */
191 if (size < XML_PARSER_BIG_ENTITY)
192 return(0);
193
194 /*
195 * A limit on the amount of text data reasonably used
196 */
197 if (ctxt->input != NULL) {
198 consumed = ctxt->input->consumed +
199 (ctxt->input->cur - ctxt->input->base);
200 }
201 consumed += ctxt->sizeentities;
202
203 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
204 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
205 return (0);
206 } else if (ent != NULL) {
207 /*
208 * use the number of parsed entities in the replacement
209 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800210 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212 /*
213 * The amount of data parsed counting entities size only once
214 */
215 if (ctxt->input != NULL) {
216 consumed = ctxt->input->consumed +
217 (ctxt->input->cur - ctxt->input->base);
218 }
219 consumed += ctxt->sizeentities;
220
221 /*
222 * Check the density of entities for the amount of data
223 * knowing an entity reference will take at least 3 bytes
224 */
225 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
226 return (0);
227 } else {
228 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800229 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800231 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
232 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
233 (ctxt->nbentities <= 10000))
234 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000235 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000236 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
237 return (1);
238}
239
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000240/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000241 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000242 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000243 * arbitrary depth limit for the XML documents that we allow to
244 * process. This is not a limitation of the parser but a safety
245 * boundary feature. It can be disabled with the XML_PARSE_HUGE
246 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000247 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000248unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000249
Daniel Veillard0fb18932003-09-07 09:14:37 +0000250
Daniel Veillard0161e632008-08-28 15:36:32 +0000251
252#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000253#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000254#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000255#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
256
Daniel Veillard1f972e92012-08-15 10:16:37 +0800257/**
258 * XML_PARSER_CHUNK_SIZE
259 *
260 * When calling GROW that's the minimal amount of data
261 * the parser expected to have received. It is not a hard
262 * limit but an optimization when reading strings like Names
263 * It is not strictly needed as long as inputs available characters
264 * are followed by 0, which should be provided by the I/O level
265 */
266#define XML_PARSER_CHUNK_SIZE 100
267
Owen Taylor3473f882001-02-23 17:55:21 +0000268/*
Owen Taylor3473f882001-02-23 17:55:21 +0000269 * List of XML prefixed PI allowed by W3C specs
270 */
271
Daniel Veillardb44025c2001-10-11 22:55:55 +0000272static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000273 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800274 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000275 NULL
276};
277
Daniel Veillarda07050d2003-10-19 14:46:32 +0000278
Owen Taylor3473f882001-02-23 17:55:21 +0000279/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200280static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
281 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard7d515752003-09-26 19:12:37 +0000283static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000284xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
285 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000286 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000287 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000288
Daniel Veillard37334572008-07-31 08:20:02 +0000289static int
290xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
291 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000292#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000293static void
294xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
295 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000296#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000297
Daniel Veillard7d515752003-09-26 19:12:37 +0000298static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000299xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
300 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000301
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000302static int
303xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
304
Daniel Veillarde57ec792003-09-10 10:50:59 +0000305/************************************************************************
306 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800307 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 * *
309 ************************************************************************/
310
311/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 * xmlErrAttributeDup:
313 * @ctxt: an XML parser context
314 * @prefix: the attribute prefix
315 * @localname: the attribute localname
316 *
317 * Handle a redefinition of attribute error
318 */
319static void
320xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
321 const xmlChar * localname)
322{
Daniel Veillard157fee02003-10-31 10:36:03 +0000323 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
324 (ctxt->instate == XML_PARSER_EOF))
325 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000326 if (ctxt != NULL)
327 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200328
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000329 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000330 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200331 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 (const char *) localname, NULL, NULL, 0, 0,
333 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000334 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000335 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200336 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 (const char *) prefix, (const char *) localname,
338 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
339 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000340 if (ctxt != NULL) {
341 ctxt->wellFormed = 0;
342 if (ctxt->recovery == 0)
343 ctxt->disableSAX = 1;
344 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345}
346
347/**
348 * xmlFatalErr:
349 * @ctxt: an XML parser context
350 * @error: the error number
351 * @extra: extra information string
352 *
353 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
354 */
355static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357{
358 const char *errmsg;
359
Daniel Veillard157fee02003-10-31 10:36:03 +0000360 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
361 (ctxt->instate == XML_PARSER_EOF))
362 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 switch (error) {
364 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 errmsg = "internal error";
375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800437 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 errmsg = "Fragment not allowed";
444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800446 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800461 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000463 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800464 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000466 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800468 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000473 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800474 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000475 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800477 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000478 break;
479 case XML_ERR_CONDSEC_INVALID_KEYWORD:
480 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000492 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000495 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800499 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000501 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800502 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800505 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000506 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000507 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800508 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000509 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000510 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800511 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000512 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000513 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000525 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000528 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000530 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000531 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800532 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000533 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800534 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800535 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800536 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000537#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000538 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800539 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000540 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000541#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800543 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000544 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000545 if (ctxt != NULL)
546 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800547 if (info == NULL) {
548 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
549 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
550 errmsg);
551 } else {
552 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
553 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
554 errmsg, info);
555 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000556 if (ctxt != NULL) {
557 ctxt->wellFormed = 0;
558 if (ctxt->recovery == 0)
559 ctxt->disableSAX = 1;
560 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000561}
562
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000563/**
564 * xmlFatalErrMsg:
565 * @ctxt: an XML parser context
566 * @error: the error number
567 * @msg: the error message
568 *
569 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
570 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800571static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000572xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
573 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574{
Daniel Veillard157fee02003-10-31 10:36:03 +0000575 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
576 (ctxt->instate == XML_PARSER_EOF))
577 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000578 if (ctxt != NULL)
579 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000580 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200581 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000582 if (ctxt != NULL) {
583 ctxt->wellFormed = 0;
584 if (ctxt->recovery == 0)
585 ctxt->disableSAX = 1;
586 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000587}
588
589/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000590 * xmlWarningMsg:
591 * @ctxt: an XML parser context
592 * @error: the error number
593 * @msg: the error message
594 * @str1: extra data
595 * @str2: extra data
596 *
597 * Handle a warning.
598 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800599static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000600xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601 const char *msg, const xmlChar *str1, const xmlChar *str2)
602{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000603 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000604
Daniel Veillard157fee02003-10-31 10:36:03 +0000605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606 (ctxt->instate == XML_PARSER_EOF))
607 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000608 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
609 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000610 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200611 if (ctxt != NULL) {
612 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000613 (ctxt->sax) ? ctxt->sax->warning : NULL,
614 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200619 } else {
620 __xmlRaiseError(schannel, NULL, NULL,
621 ctxt, NULL, XML_FROM_PARSER, error,
622 XML_ERR_WARNING, NULL, 0,
623 (const char *) str1, (const char *) str2, NULL, 0, 0,
624 msg, (const char *) str1, (const char *) str2);
625 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000626}
627
628/**
629 * xmlValidityError:
630 * @ctxt: an XML parser context
631 * @error: the error number
632 * @msg: the error message
633 * @str1: extra data
634 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000635 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000636 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800637static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000638xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000639 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000640{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000641 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000642
643 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
644 (ctxt->instate == XML_PARSER_EOF))
645 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000646 if (ctxt != NULL) {
647 ctxt->errNo = error;
648 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
649 schannel = ctxt->sax->serror;
650 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200651 if (ctxt != NULL) {
652 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000653 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000654 ctxt, NULL, XML_FROM_DTD, error,
655 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000656 (const char *) str2, NULL, 0, 0,
657 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000658 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200659 } else {
660 __xmlRaiseError(schannel, NULL, NULL,
661 ctxt, NULL, XML_FROM_DTD, error,
662 XML_ERR_ERROR, NULL, 0, (const char *) str1,
663 (const char *) str2, NULL, 0, 0,
664 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000665 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000666}
667
668/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000669 * xmlFatalErrMsgInt:
670 * @ctxt: an XML parser context
671 * @error: the error number
672 * @msg: the error message
673 * @val: an integer value
674 *
675 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
676 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800677static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000678xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000679 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000680{
Daniel Veillard157fee02003-10-31 10:36:03 +0000681 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
682 (ctxt->instate == XML_PARSER_EOF))
683 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000684 if (ctxt != NULL)
685 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000686 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000687 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
688 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000689 if (ctxt != NULL) {
690 ctxt->wellFormed = 0;
691 if (ctxt->recovery == 0)
692 ctxt->disableSAX = 1;
693 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000694}
695
696/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000697 * xmlFatalErrMsgStrIntStr:
698 * @ctxt: an XML parser context
699 * @error: the error number
700 * @msg: the error message
701 * @str1: an string info
702 * @val: an integer value
703 * @str2: an string info
704 *
705 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
706 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800707static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000708xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800709 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000710 const xmlChar *str2)
711{
Daniel Veillard157fee02003-10-31 10:36:03 +0000712 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
713 (ctxt->instate == XML_PARSER_EOF))
714 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000715 if (ctxt != NULL)
716 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000717 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000718 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
719 NULL, 0, (const char *) str1, (const char *) str2,
720 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000721 if (ctxt != NULL) {
722 ctxt->wellFormed = 0;
723 if (ctxt->recovery == 0)
724 ctxt->disableSAX = 1;
725 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000726}
727
728/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000729 * xmlFatalErrMsgStr:
730 * @ctxt: an XML parser context
731 * @error: the error number
732 * @msg: the error message
733 * @val: a string value
734 *
735 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
736 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800737static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000738xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000739 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000740{
Daniel Veillard157fee02003-10-31 10:36:03 +0000741 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
742 (ctxt->instate == XML_PARSER_EOF))
743 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000744 if (ctxt != NULL)
745 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000746 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000747 XML_FROM_PARSER, error, XML_ERR_FATAL,
748 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
749 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000750 if (ctxt != NULL) {
751 ctxt->wellFormed = 0;
752 if (ctxt->recovery == 0)
753 ctxt->disableSAX = 1;
754 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000755}
756
757/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000758 * xmlErrMsgStr:
759 * @ctxt: an XML parser context
760 * @error: the error number
761 * @msg: the error message
762 * @val: a string value
763 *
764 * Handle a non fatal parser error
765 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800766static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000767xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
768 const char *msg, const xmlChar * val)
769{
Daniel Veillard157fee02003-10-31 10:36:03 +0000770 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
771 (ctxt->instate == XML_PARSER_EOF))
772 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000773 if (ctxt != NULL)
774 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000775 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000776 XML_FROM_PARSER, error, XML_ERR_ERROR,
777 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
778 val);
779}
780
781/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000782 * xmlNsErr:
783 * @ctxt: an XML parser context
784 * @error: the error number
785 * @msg: the message
786 * @info1: extra information string
787 * @info2: extra information string
788 *
789 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
790 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800791static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000792xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
793 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000794 const xmlChar * info1, const xmlChar * info2,
795 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000796{
Daniel Veillard157fee02003-10-31 10:36:03 +0000797 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
798 (ctxt->instate == XML_PARSER_EOF))
799 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000800 if (ctxt != NULL)
801 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000802 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000803 XML_ERR_ERROR, NULL, 0, (const char *) info1,
804 (const char *) info2, (const char *) info3, 0, 0, msg,
805 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000806 if (ctxt != NULL)
807 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000808}
809
Daniel Veillard37334572008-07-31 08:20:02 +0000810/**
811 * xmlNsWarn
812 * @ctxt: an XML parser context
813 * @error: the error number
814 * @msg: the message
815 * @info1: extra information string
816 * @info2: extra information string
817 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800818 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000819 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800820static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000821xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
822 const char *msg,
823 const xmlChar * info1, const xmlChar * info2,
824 const xmlChar * info3)
825{
826 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
827 (ctxt->instate == XML_PARSER_EOF))
828 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000829 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
830 XML_ERR_WARNING, NULL, 0, (const char *) info1,
831 (const char *) info2, (const char *) info3, 0, 0, msg,
832 info1, info2, info3);
833}
834
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000835/************************************************************************
836 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800837 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000838 * *
839 ************************************************************************/
840
841/**
842 * xmlHasFeature:
843 * @feature: the feature to be examined
844 *
845 * Examines if the library has been compiled with a given feature.
846 *
847 * Returns a non-zero value if the feature exist, otherwise zero.
848 * Returns zero (0) if the feature does not exist or an unknown
849 * unknown feature is requested, non-zero otherwise.
850 */
851int
852xmlHasFeature(xmlFeature feature)
853{
854 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_THREAD_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_TREE_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_OUTPUT_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_PUSH_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_READER_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_PATTERN_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_WRITER_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_SAX1_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_FTP_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_HTTP_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_VALID_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_HTML_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_LEGACY_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_C14N_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef LIBXML_CATALOG_ENABLED
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_XPATH_ENABLED
947 return(1);
948#else
949 return(0);
950#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000951 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000952#ifdef LIBXML_XPTR_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000957 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000958#ifdef LIBXML_XINCLUDE_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000963 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000964#ifdef LIBXML_ICONV_ENABLED
965 return(1);
966#else
967 return(0);
968#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000969 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000970#ifdef LIBXML_ISO8859X_ENABLED
971 return(1);
972#else
973 return(0);
974#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000975 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000976#ifdef LIBXML_UNICODE_ENABLED
977 return(1);
978#else
979 return(0);
980#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000981 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000982#ifdef LIBXML_REGEXP_ENABLED
983 return(1);
984#else
985 return(0);
986#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000987 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000988#ifdef LIBXML_AUTOMATA_ENABLED
989 return(1);
990#else
991 return(0);
992#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000993 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000994#ifdef LIBXML_EXPR_ENABLED
995 return(1);
996#else
997 return(0);
998#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000999 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001000#ifdef LIBXML_SCHEMAS_ENABLED
1001 return(1);
1002#else
1003 return(0);
1004#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001005 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001006#ifdef LIBXML_SCHEMATRON_ENABLED
1007 return(1);
1008#else
1009 return(0);
1010#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001011 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012#ifdef LIBXML_MODULES_ENABLED
1013 return(1);
1014#else
1015 return(0);
1016#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001017 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001018#ifdef LIBXML_DEBUG_ENABLED
1019 return(1);
1020#else
1021 return(0);
1022#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001023 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001024#ifdef DEBUG_MEMORY_LOCATION
1025 return(1);
1026#else
1027 return(0);
1028#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001029 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001030#ifdef LIBXML_DEBUG_RUNTIME
1031 return(1);
1032#else
1033 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001034#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001035 case XML_WITH_ZLIB:
1036#ifdef LIBXML_ZLIB_ENABLED
1037 return(1);
1038#else
1039 return(0);
1040#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001041 case XML_WITH_LZMA:
1042#ifdef LIBXML_LZMA_ENABLED
1043 return(1);
1044#else
1045 return(0);
1046#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001047 case XML_WITH_ICU:
1048#ifdef LIBXML_ICU_ENABLED
1049 return(1);
1050#else
1051 return(0);
1052#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001053 default:
1054 break;
1055 }
1056 return(0);
1057}
1058
1059/************************************************************************
1060 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001061 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 * *
1063 ************************************************************************/
1064
1065/**
1066 * xmlDetectSAX2:
1067 * @ctxt: an XML parser context
1068 *
1069 * Do the SAX2 detection and specific intialization
1070 */
1071static void
1072xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1073 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001074#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001075 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1076 ((ctxt->sax->startElementNs != NULL) ||
1077 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001078#else
1079 ctxt->sax2 = 1;
1080#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001081
1082 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1083 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1084 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001085 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1086 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001087 xmlErrMemory(ctxt, NULL);
1088 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001089}
1090
Daniel Veillarde57ec792003-09-10 10:50:59 +00001091typedef struct _xmlDefAttrs xmlDefAttrs;
1092typedef xmlDefAttrs *xmlDefAttrsPtr;
1093struct _xmlDefAttrs {
1094 int nbAttrs; /* number of defaulted attributes on that element */
1095 int maxAttrs; /* the size of the array */
Nick Wellnhofer47496722017-05-31 16:46:39 +02001096#if __STDC_VERSION__ >= 199901L
1097 /* Using a C99 flexible array member avoids UBSan errors. */
1098 const xmlChar *values[]; /* array of localname/prefix/values/external */
1099#else
1100 const xmlChar *values[5];
1101#endif
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001103
1104/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001105 * xmlAttrNormalizeSpace:
1106 * @src: the source string
1107 * @dst: the target string
1108 *
1109 * Normalize the space in non CDATA attribute values:
1110 * If the attribute type is not CDATA, then the XML processor MUST further
1111 * process the normalized attribute value by discarding any leading and
1112 * trailing space (#x20) characters, and by replacing sequences of space
1113 * (#x20) characters by a single space (#x20) character.
1114 * Note that the size of dst need to be at least src, and if one doesn't need
1115 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1116 * passing src as dst is just fine.
1117 *
1118 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1119 * is needed.
1120 */
1121static xmlChar *
1122xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1123{
1124 if ((src == NULL) || (dst == NULL))
1125 return(NULL);
1126
1127 while (*src == 0x20) src++;
1128 while (*src != 0) {
1129 if (*src == 0x20) {
1130 while (*src == 0x20) src++;
1131 if (*src != 0)
1132 *dst++ = 0x20;
1133 } else {
1134 *dst++ = *src++;
1135 }
1136 }
1137 *dst = 0;
1138 if (dst == src)
1139 return(NULL);
1140 return(dst);
1141}
1142
1143/**
1144 * xmlAttrNormalizeSpace2:
1145 * @src: the source string
1146 *
1147 * Normalize the space in non CDATA attribute values, a slightly more complex
1148 * front end to avoid allocation problems when running on attribute values
1149 * coming from the input.
1150 *
1151 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1152 * is needed.
1153 */
1154static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001155xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001156{
1157 int i;
1158 int remove_head = 0;
1159 int need_realloc = 0;
1160 const xmlChar *cur;
1161
1162 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1163 return(NULL);
1164 i = *len;
1165 if (i <= 0)
1166 return(NULL);
1167
1168 cur = src;
1169 while (*cur == 0x20) {
1170 cur++;
1171 remove_head++;
1172 }
1173 while (*cur != 0) {
1174 if (*cur == 0x20) {
1175 cur++;
1176 if ((*cur == 0x20) || (*cur == 0)) {
1177 need_realloc = 1;
1178 break;
1179 }
1180 } else
1181 cur++;
1182 }
1183 if (need_realloc) {
1184 xmlChar *ret;
1185
1186 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1187 if (ret == NULL) {
1188 xmlErrMemory(ctxt, NULL);
1189 return(NULL);
1190 }
1191 xmlAttrNormalizeSpace(ret, ret);
1192 *len = (int) strlen((const char *)ret);
1193 return(ret);
1194 } else if (remove_head) {
1195 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001196 memmove(src, src + remove_head, 1 + *len);
1197 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001198 }
1199 return(NULL);
1200}
1201
1202/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001203 * xmlAddDefAttrs:
1204 * @ctxt: an XML parser context
1205 * @fullname: the element fullname
1206 * @fullattr: the attribute fullname
1207 * @value: the attribute value
1208 *
1209 * Add a defaulted attribute for an element
1210 */
1211static void
1212xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1213 const xmlChar *fullname,
1214 const xmlChar *fullattr,
1215 const xmlChar *value) {
1216 xmlDefAttrsPtr defaults;
1217 int len;
1218 const xmlChar *name;
1219 const xmlChar *prefix;
1220
Daniel Veillard6a31b832008-03-26 14:06:44 +00001221 /*
1222 * Allows to detect attribute redefinitions
1223 */
1224 if (ctxt->attsSpecial != NULL) {
1225 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1226 return;
1227 }
1228
Daniel Veillarde57ec792003-09-10 10:50:59 +00001229 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001230 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001231 if (ctxt->attsDefault == NULL)
1232 goto mem_error;
1233 }
1234
1235 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001236 * split the element name into prefix:localname , the string found
1237 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001238 */
1239 name = xmlSplitQName3(fullname, &len);
1240 if (name == NULL) {
1241 name = xmlDictLookup(ctxt->dict, fullname, -1);
1242 prefix = NULL;
1243 } else {
1244 name = xmlDictLookup(ctxt->dict, name, -1);
1245 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1246 }
1247
1248 /*
1249 * make sure there is some storage
1250 */
1251 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1252 if (defaults == NULL) {
1253 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001254 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001255 if (defaults == NULL)
1256 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001257 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001258 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001259 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1260 defaults, NULL) < 0) {
1261 xmlFree(defaults);
1262 goto mem_error;
1263 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001265 xmlDefAttrsPtr temp;
1266
1267 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001268 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001269 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001270 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001271 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001272 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001273 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1274 defaults, NULL) < 0) {
1275 xmlFree(defaults);
1276 goto mem_error;
1277 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001278 }
1279
1280 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001281 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001282 * are within the DTD and hen not associated to namespace names.
1283 */
1284 name = xmlSplitQName3(fullattr, &len);
1285 if (name == NULL) {
1286 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1287 prefix = NULL;
1288 } else {
1289 name = xmlDictLookup(ctxt->dict, name, -1);
1290 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1291 }
1292
Daniel Veillardae0765b2008-07-31 19:54:59 +00001293 defaults->values[5 * defaults->nbAttrs] = name;
1294 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001295 /* intern the string and precompute the end */
1296 len = xmlStrlen(value);
1297 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001298 defaults->values[5 * defaults->nbAttrs + 2] = value;
1299 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1300 if (ctxt->external)
1301 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1302 else
1303 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001304 defaults->nbAttrs++;
1305
1306 return;
1307
1308mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001309 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001310 return;
1311}
1312
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001313/**
1314 * xmlAddSpecialAttr:
1315 * @ctxt: an XML parser context
1316 * @fullname: the element fullname
1317 * @fullattr: the attribute fullname
1318 * @type: the attribute type
1319 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001320 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001321 */
1322static void
1323xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1324 const xmlChar *fullname,
1325 const xmlChar *fullattr,
1326 int type)
1327{
1328 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001329 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001330 if (ctxt->attsSpecial == NULL)
1331 goto mem_error;
1332 }
1333
Daniel Veillardac4118d2008-01-11 05:27:32 +00001334 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1335 return;
1336
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001337 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1338 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001339 return;
1340
1341mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001342 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001343 return;
1344}
1345
Daniel Veillard4432df22003-09-28 18:58:27 +00001346/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001347 * xmlCleanSpecialAttrCallback:
1348 *
1349 * Removes CDATA attributes from the special attribute table
1350 */
1351static void
1352xmlCleanSpecialAttrCallback(void *payload, void *data,
1353 const xmlChar *fullname, const xmlChar *fullattr,
1354 const xmlChar *unused ATTRIBUTE_UNUSED) {
1355 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1356
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001357 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001358 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1359 }
1360}
1361
1362/**
1363 * xmlCleanSpecialAttr:
1364 * @ctxt: an XML parser context
1365 *
1366 * Trim the list of attributes defined to remove all those of type
1367 * CDATA as they are not special. This call should be done when finishing
1368 * to parse the DTD and before starting to parse the document root.
1369 */
1370static void
1371xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1372{
1373 if (ctxt->attsSpecial == NULL)
1374 return;
1375
1376 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1377
1378 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1379 xmlHashFree(ctxt->attsSpecial, NULL);
1380 ctxt->attsSpecial = NULL;
1381 }
1382 return;
1383}
1384
1385/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001386 * xmlCheckLanguageID:
1387 * @lang: pointer to the string value
1388 *
1389 * Checks that the value conforms to the LanguageID production:
1390 *
1391 * NOTE: this is somewhat deprecated, those productions were removed from
1392 * the XML Second edition.
1393 *
1394 * [33] LanguageID ::= Langcode ('-' Subcode)*
1395 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1396 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1397 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1398 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1399 * [38] Subcode ::= ([a-z] | [A-Z])+
1400 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001401 * The current REC reference the sucessors of RFC 1766, currently 5646
1402 *
1403 * http://www.rfc-editor.org/rfc/rfc5646.txt
1404 * langtag = language
1405 * ["-" script]
1406 * ["-" region]
1407 * *("-" variant)
1408 * *("-" extension)
1409 * ["-" privateuse]
1410 * language = 2*3ALPHA ; shortest ISO 639 code
1411 * ["-" extlang] ; sometimes followed by
1412 * ; extended language subtags
1413 * / 4ALPHA ; or reserved for future use
1414 * / 5*8ALPHA ; or registered language subtag
1415 *
1416 * extlang = 3ALPHA ; selected ISO 639 codes
1417 * *2("-" 3ALPHA) ; permanently reserved
1418 *
1419 * script = 4ALPHA ; ISO 15924 code
1420 *
1421 * region = 2ALPHA ; ISO 3166-1 code
1422 * / 3DIGIT ; UN M.49 code
1423 *
1424 * variant = 5*8alphanum ; registered variants
1425 * / (DIGIT 3alphanum)
1426 *
1427 * extension = singleton 1*("-" (2*8alphanum))
1428 *
1429 * ; Single alphanumerics
1430 * ; "x" reserved for private use
1431 * singleton = DIGIT ; 0 - 9
1432 * / %x41-57 ; A - W
1433 * / %x59-5A ; Y - Z
1434 * / %x61-77 ; a - w
1435 * / %x79-7A ; y - z
1436 *
1437 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1438 * The parser below doesn't try to cope with extension or privateuse
1439 * that could be added but that's not interoperable anyway
1440 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001441 * Returns 1 if correct 0 otherwise
1442 **/
1443int
1444xmlCheckLanguageID(const xmlChar * lang)
1445{
Daniel Veillard60587d62010-11-04 15:16:27 +01001446 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001447
1448 if (cur == NULL)
1449 return (0);
1450 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001451 ((cur[0] == 'I') && (cur[1] == '-')) ||
1452 ((cur[0] == 'x') && (cur[1] == '-')) ||
1453 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001454 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001455 * Still allow IANA code and user code which were coming
1456 * from the previous version of the XML-1.0 specification
1457 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001458 */
1459 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001460 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001461 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1462 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001463 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001464 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001465 nxt = cur;
1466 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1467 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1468 nxt++;
1469 if (nxt - cur >= 4) {
1470 /*
1471 * Reserved
1472 */
1473 if ((nxt - cur > 8) || (nxt[0] != 0))
1474 return(0);
1475 return(1);
1476 }
1477 if (nxt - cur < 2)
1478 return(0);
1479 /* we got an ISO 639 code */
1480 if (nxt[0] == 0)
1481 return(1);
1482 if (nxt[0] != '-')
1483 return(0);
1484
1485 nxt++;
1486 cur = nxt;
1487 /* now we can have extlang or script or region or variant */
1488 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1489 goto region_m49;
1490
1491 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1492 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1493 nxt++;
1494 if (nxt - cur == 4)
1495 goto script;
1496 if (nxt - cur == 2)
1497 goto region;
1498 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1499 goto variant;
1500 if (nxt - cur != 3)
1501 return(0);
1502 /* we parsed an extlang */
1503 if (nxt[0] == 0)
1504 return(1);
1505 if (nxt[0] != '-')
1506 return(0);
1507
1508 nxt++;
1509 cur = nxt;
1510 /* now we can have script or region or variant */
1511 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1512 goto region_m49;
1513
1514 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1515 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1516 nxt++;
1517 if (nxt - cur == 2)
1518 goto region;
1519 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1520 goto variant;
1521 if (nxt - cur != 4)
1522 return(0);
1523 /* we parsed a script */
1524script:
1525 if (nxt[0] == 0)
1526 return(1);
1527 if (nxt[0] != '-')
1528 return(0);
1529
1530 nxt++;
1531 cur = nxt;
1532 /* now we can have region or variant */
1533 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1534 goto region_m49;
1535
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538 nxt++;
1539
1540 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1541 goto variant;
1542 if (nxt - cur != 2)
1543 return(0);
1544 /* we parsed a region */
1545region:
1546 if (nxt[0] == 0)
1547 return(1);
1548 if (nxt[0] != '-')
1549 return(0);
1550
1551 nxt++;
1552 cur = nxt;
1553 /* now we can just have a variant */
1554 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1555 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1556 nxt++;
1557
1558 if ((nxt - cur < 5) || (nxt - cur > 8))
1559 return(0);
1560
1561 /* we parsed a variant */
1562variant:
1563 if (nxt[0] == 0)
1564 return(1);
1565 if (nxt[0] != '-')
1566 return(0);
1567 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001568 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001569
1570region_m49:
1571 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1572 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1573 nxt += 3;
1574 goto region;
1575 }
1576 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001577}
1578
Owen Taylor3473f882001-02-23 17:55:21 +00001579/************************************************************************
1580 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001581 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001582 * *
1583 ************************************************************************/
1584
Daniel Veillard8ed10722009-08-20 19:17:36 +02001585static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1586 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001587
Daniel Veillard0fb18932003-09-07 09:14:37 +00001588#ifdef SAX2
1589/**
1590 * nsPush:
1591 * @ctxt: an XML parser context
1592 * @prefix: the namespace prefix or NULL
1593 * @URL: the namespace name
1594 *
1595 * Pushes a new parser namespace on top of the ns stack
1596 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001597 * Returns -1 in case of error, -2 if the namespace should be discarded
1598 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001599 */
1600static int
1601nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1602{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001603 if (ctxt->options & XML_PARSE_NSCLEAN) {
1604 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001605 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001606 if (ctxt->nsTab[i] == prefix) {
1607 /* in scope */
1608 if (ctxt->nsTab[i + 1] == URL)
1609 return(-2);
1610 /* out of scope keep it */
1611 break;
1612 }
1613 }
1614 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001615 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1616 ctxt->nsMax = 10;
1617 ctxt->nsNr = 0;
1618 ctxt->nsTab = (const xmlChar **)
1619 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1620 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001621 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001622 ctxt->nsMax = 0;
1623 return (-1);
1624 }
1625 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001626 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001627 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001628 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1629 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1630 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001631 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001632 ctxt->nsMax /= 2;
1633 return (-1);
1634 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001635 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 }
1637 ctxt->nsTab[ctxt->nsNr++] = prefix;
1638 ctxt->nsTab[ctxt->nsNr++] = URL;
1639 return (ctxt->nsNr);
1640}
1641/**
1642 * nsPop:
1643 * @ctxt: an XML parser context
1644 * @nr: the number to pop
1645 *
1646 * Pops the top @nr parser prefix/namespace from the ns stack
1647 *
1648 * Returns the number of namespaces removed
1649 */
1650static int
1651nsPop(xmlParserCtxtPtr ctxt, int nr)
1652{
1653 int i;
1654
1655 if (ctxt->nsTab == NULL) return(0);
1656 if (ctxt->nsNr < nr) {
1657 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1658 nr = ctxt->nsNr;
1659 }
1660 if (ctxt->nsNr <= 0)
1661 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001662
Daniel Veillard0fb18932003-09-07 09:14:37 +00001663 for (i = 0;i < nr;i++) {
1664 ctxt->nsNr--;
1665 ctxt->nsTab[ctxt->nsNr] = NULL;
1666 }
1667 return(nr);
1668}
1669#endif
1670
1671static int
1672xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1673 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001674 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001675 int maxatts;
1676
1677 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001679 atts = (const xmlChar **)
1680 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001681 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001682 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001683 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001686 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 } else if (nr + 5 > ctxt->maxatts) {
1688 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001689 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1690 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001691 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001692 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001693 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1694 (maxatts / 5) * sizeof(int));
1695 if (attallocs == NULL) goto mem_error;
1696 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001697 ctxt->maxatts = maxatts;
1698 }
1699 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001700mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001701 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001702 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001703}
1704
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001705/**
1706 * inputPush:
1707 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001708 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001709 *
1710 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001711 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001712 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001713 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001714int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1716{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001717 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001718 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001719 if (ctxt->inputNr >= ctxt->inputMax) {
1720 ctxt->inputMax *= 2;
1721 ctxt->inputTab =
1722 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1723 ctxt->inputMax *
1724 sizeof(ctxt->inputTab[0]));
1725 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001726 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001727 xmlFreeInputStream(value);
1728 ctxt->inputMax /= 2;
1729 value = NULL;
1730 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001731 }
1732 }
1733 ctxt->inputTab[ctxt->inputNr] = value;
1734 ctxt->input = value;
1735 return (ctxt->inputNr++);
1736}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001737/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001738 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001739 * @ctxt: an XML parser context
1740 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001742 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001743 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001744 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001745xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001746inputPop(xmlParserCtxtPtr ctxt)
1747{
1748 xmlParserInputPtr ret;
1749
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001750 if (ctxt == NULL)
1751 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001752 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001753 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001754 ctxt->inputNr--;
1755 if (ctxt->inputNr > 0)
1756 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1757 else
1758 ctxt->input = NULL;
1759 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001760 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001761 return (ret);
1762}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001763/**
1764 * nodePush:
1765 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001766 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001767 *
1768 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001769 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001770 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001771 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001772int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001773nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1774{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001775 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001776 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001777 xmlNodePtr *tmp;
1778
1779 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1780 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001781 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001782 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001783 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001784 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001785 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001786 ctxt->nodeTab = tmp;
1787 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001788 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001789 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1790 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001791 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001792 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001793 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001794 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001795 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001796 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001797 ctxt->nodeTab[ctxt->nodeNr] = value;
1798 ctxt->node = value;
1799 return (ctxt->nodeNr++);
1800}
Daniel Veillard8915c152008-08-26 13:05:34 +00001801
Daniel Veillard1c732d22002-11-30 11:22:59 +00001802/**
1803 * nodePop:
1804 * @ctxt: an XML parser context
1805 *
1806 * Pops the top element node from the node stack
1807 *
1808 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001809 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001810xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001811nodePop(xmlParserCtxtPtr ctxt)
1812{
1813 xmlNodePtr ret;
1814
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001815 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001816 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001817 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001818 ctxt->nodeNr--;
1819 if (ctxt->nodeNr > 0)
1820 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1821 else
1822 ctxt->node = NULL;
1823 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001824 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001825 return (ret);
1826}
Daniel Veillarda2351322004-06-27 12:08:10 +00001827
1828#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001829/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001830 * nameNsPush:
1831 * @ctxt: an XML parser context
1832 * @value: the element name
1833 * @prefix: the element prefix
1834 * @URI: the element namespace name
1835 *
1836 * Pushes a new element name/prefix/URL on top of the name stack
1837 *
1838 * Returns -1 in case of error, the index in the stack otherwise
1839 */
1840static int
1841nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1842 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1843{
1844 if (ctxt->nameNr >= ctxt->nameMax) {
1845 const xmlChar * *tmp;
1846 void **tmp2;
1847 ctxt->nameMax *= 2;
1848 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1849 ctxt->nameMax *
1850 sizeof(ctxt->nameTab[0]));
1851 if (tmp == NULL) {
1852 ctxt->nameMax /= 2;
1853 goto mem_error;
1854 }
1855 ctxt->nameTab = tmp;
1856 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1857 ctxt->nameMax * 3 *
1858 sizeof(ctxt->pushTab[0]));
1859 if (tmp2 == NULL) {
1860 ctxt->nameMax /= 2;
1861 goto mem_error;
1862 }
1863 ctxt->pushTab = tmp2;
1864 }
1865 ctxt->nameTab[ctxt->nameNr] = value;
1866 ctxt->name = value;
1867 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1868 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001869 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 return (ctxt->nameNr++);
1871mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001872 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001873 return (-1);
1874}
1875/**
1876 * nameNsPop:
1877 * @ctxt: an XML parser context
1878 *
1879 * Pops the top element/prefix/URI name from the name stack
1880 *
1881 * Returns the name just removed
1882 */
1883static const xmlChar *
1884nameNsPop(xmlParserCtxtPtr ctxt)
1885{
1886 const xmlChar *ret;
1887
1888 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001889 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001890 ctxt->nameNr--;
1891 if (ctxt->nameNr > 0)
1892 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1893 else
1894 ctxt->name = NULL;
1895 ret = ctxt->nameTab[ctxt->nameNr];
1896 ctxt->nameTab[ctxt->nameNr] = NULL;
1897 return (ret);
1898}
Daniel Veillarda2351322004-06-27 12:08:10 +00001899#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001900
1901/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 * namePush:
1903 * @ctxt: an XML parser context
1904 * @value: the element name
1905 *
1906 * Pushes a new element name on top of the name stack
1907 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001908 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001910int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001911namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001912{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001913 if (ctxt == NULL) return (-1);
1914
Daniel Veillard1c732d22002-11-30 11:22:59 +00001915 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001916 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001917 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001918 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001919 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001920 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001921 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001922 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001923 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001924 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001925 }
1926 ctxt->nameTab[ctxt->nameNr] = value;
1927 ctxt->name = value;
1928 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001929mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001930 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001931 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001932}
1933/**
1934 * namePop:
1935 * @ctxt: an XML parser context
1936 *
1937 * Pops the top element name from the name stack
1938 *
1939 * Returns the name just removed
1940 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001941const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001942namePop(xmlParserCtxtPtr ctxt)
1943{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001944 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001945
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001946 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1947 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001948 ctxt->nameNr--;
1949 if (ctxt->nameNr > 0)
1950 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1951 else
1952 ctxt->name = NULL;
1953 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001954 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001955 return (ret);
1956}
Owen Taylor3473f882001-02-23 17:55:21 +00001957
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001958static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001959 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001960 int *tmp;
1961
Owen Taylor3473f882001-02-23 17:55:21 +00001962 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001963 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1964 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1965 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001966 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001967 ctxt->spaceMax /=2;
1968 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001969 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001970 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001971 }
1972 ctxt->spaceTab[ctxt->spaceNr] = val;
1973 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1974 return(ctxt->spaceNr++);
1975}
1976
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001977static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001978 int ret;
1979 if (ctxt->spaceNr <= 0) return(0);
1980 ctxt->spaceNr--;
1981 if (ctxt->spaceNr > 0)
1982 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1983 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001984 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001985 ret = ctxt->spaceTab[ctxt->spaceNr];
1986 ctxt->spaceTab[ctxt->spaceNr] = -1;
1987 return(ret);
1988}
1989
1990/*
1991 * Macros for accessing the content. Those should be used only by the parser,
1992 * and not exported.
1993 *
1994 * Dirty macros, i.e. one often need to make assumption on the context to
1995 * use them
1996 *
1997 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1998 * To be used with extreme caution since operations consuming
1999 * characters may move the input buffer to a different location !
2000 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2001 * This should be used internally by the parser
2002 * only to compare to ASCII values otherwise it would break when
2003 * running with UTF-8 encoding.
2004 * RAW same as CUR but in the input buffer, bypass any token
2005 * extraction that may have been done
2006 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2007 * to compare on ASCII based substring.
2008 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00002009 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002010 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002011 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002012 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2013 *
2014 * NEXT Skip to the next character, this does the proper decoding
2015 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002016 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002017 * CUR_CHAR(l) returns the current unicode character (int), set l
2018 * to the number of xmlChars used for the encoding [0-5].
2019 * CUR_SCHAR same but operate on a string instead of the context
2020 * COPY_BUF copy the current unicode char to the target buffer, increment
2021 * the index
2022 * GROW, SHRINK handling of input buffers
2023 */
2024
Daniel Veillardfdc91562002-07-01 21:52:03 +00002025#define RAW (*ctxt->input->cur)
2026#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002027#define NXT(val) ctxt->input->cur[(val)]
2028#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002029#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002030
Daniel Veillarda07050d2003-10-19 14:46:32 +00002031#define CMP4( s, c1, c2, c3, c4 ) \
2032 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2033 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2034#define CMP5( s, c1, c2, c3, c4, c5 ) \
2035 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2036#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2037 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2038#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2039 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2040#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2041 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2042#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2043 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2044 ((unsigned char *) s)[ 8 ] == c9 )
2045#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2046 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2047 ((unsigned char *) s)[ 9 ] == c10 )
2048
Owen Taylor3473f882001-02-23 17:55:21 +00002049#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002050 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002051 if (*ctxt->input->cur == 0) \
2052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Owen Taylor3473f882001-02-23 17:55:21 +00002053 } while (0)
2054
Daniel Veillard0b787f32004-03-26 17:29:53 +00002055#define SKIPL(val) do { \
2056 int skipl; \
2057 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002058 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002059 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002060 } else ctxt->input->col++; \
2061 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002062 ctxt->input->cur++; \
2063 } \
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002064 if (*ctxt->input->cur == 0) \
2065 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002066 } while (0)
2067
Daniel Veillarda880b122003-04-21 21:36:41 +00002068#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002069 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2070 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002071 xmlSHRINK (ctxt);
2072
2073static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2074 xmlParserInputShrink(ctxt->input);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002075 if (*ctxt->input->cur == 0)
2076 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2077}
Owen Taylor3473f882001-02-23 17:55:21 +00002078
Daniel Veillarda880b122003-04-21 21:36:41 +00002079#define GROW if ((ctxt->progressive == 0) && \
2080 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002081 xmlGROW (ctxt);
2082
2083static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002084 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2085 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2086
2087 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2088 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002089 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002090 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2091 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002092 xmlHaltParser(ctxt);
2093 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002094 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002095 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002096 if ((ctxt->input->cur > ctxt->input->end) ||
2097 (ctxt->input->cur < ctxt->input->base)) {
2098 xmlHaltParser(ctxt);
2099 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2100 return;
2101 }
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002102 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2103 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillarda880b122003-04-21 21:36:41 +00002104}
Owen Taylor3473f882001-02-23 17:55:21 +00002105
2106#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2107
2108#define NEXT xmlNextChar(ctxt)
2109
Daniel Veillard21a0f912001-02-25 19:54:14 +00002110#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002111 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002112 ctxt->input->cur++; \
2113 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002114 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002115 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2116 }
2117
Owen Taylor3473f882001-02-23 17:55:21 +00002118#define NEXTL(l) do { \
2119 if (*(ctxt->input->cur) == '\n') { \
2120 ctxt->input->line++; ctxt->input->col = 1; \
2121 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002122 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002123 } while (0)
2124
2125#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2126#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2127
2128#define COPY_BUF(l,b,i,v) \
2129 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002130 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002131
2132/**
2133 * xmlSkipBlankChars:
2134 * @ctxt: the XML parser context
2135 *
2136 * skip all blanks character found at that point in the input streams.
2137 * It pops up finished entities in the process if allowable at that point.
2138 *
2139 * Returns the number of space chars skipped
2140 */
2141
2142int
2143xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002144 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002145
2146 /*
2147 * It's Okay to use CUR/NEXT here since all the blanks are on
2148 * the ASCII range.
2149 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002150 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2151 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002152 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002153 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002154 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002155 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002156 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002157 if (*cur == '\n') {
2158 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002159 } else {
2160 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002161 }
2162 cur++;
2163 res++;
2164 if (*cur == 0) {
2165 ctxt->input->cur = cur;
2166 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2167 cur = ctxt->input->cur;
2168 }
2169 }
2170 ctxt->input->cur = cur;
2171 } else {
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002172 int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2173
2174 while (1) {
2175 if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002176 NEXT;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002177 } else if (CUR == '%') {
2178 /*
2179 * Need to handle support of entities branching here
2180 */
2181 if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2182 break;
2183 xmlParsePEReference(ctxt);
2184 } else if (CUR == 0) {
2185 if (ctxt->inputNr <= 1)
2186 break;
2187 xmlPopInput(ctxt);
2188 } else {
2189 break;
2190 }
Nick Wellnhofer872fea92017-06-19 00:24:12 +02002191
2192 /*
2193 * Also increase the counter when entering or exiting a PERef.
2194 * The spec says: "When a parameter-entity reference is recognized
2195 * in the DTD and included, its replacement text MUST be enlarged
2196 * by the attachment of one leading and one following space (#x20)
2197 * character."
2198 */
2199 res++;
Nick Wellnhoferaa267cd2017-06-18 23:29:51 +02002200 }
Daniel Veillard02141ea2001-04-30 11:46:40 +00002201 }
Owen Taylor3473f882001-02-23 17:55:21 +00002202 return(res);
2203}
2204
2205/************************************************************************
2206 * *
2207 * Commodity functions to handle entities *
2208 * *
2209 ************************************************************************/
2210
2211/**
2212 * xmlPopInput:
2213 * @ctxt: an XML parser context
2214 *
2215 * xmlPopInput: the current input pointed by ctxt->input came to an end
2216 * pop it and return the next char.
2217 *
2218 * Returns the current xmlChar in the parser context
2219 */
2220xmlChar
2221xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002222 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002223 if (xmlParserDebugEntities)
2224 xmlGenericError(xmlGenericErrorContext,
2225 "Popping input %d\n", ctxt->inputNr);
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02002226 if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2227 (ctxt->instate != XML_PARSER_EOF))
2228 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2229 "Unfinished entity outside the DTD");
Owen Taylor3473f882001-02-23 17:55:21 +00002230 xmlFreeInputStream(inputPop(ctxt));
Nick Wellnhofer453dff12017-06-19 17:55:20 +02002231 if (*ctxt->input->cur == 0)
2232 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Owen Taylor3473f882001-02-23 17:55:21 +00002233 return(CUR);
2234}
2235
2236/**
2237 * xmlPushInput:
2238 * @ctxt: an XML parser context
2239 * @input: an XML parser input fragment (entity, XML fragment ...).
2240 *
2241 * xmlPushInput: switch to a new input stream which is stacked on top
2242 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002243 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002244 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002245int
Owen Taylor3473f882001-02-23 17:55:21 +00002246xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002247 int ret;
2248 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002249
2250 if (xmlParserDebugEntities) {
2251 if ((ctxt->input != NULL) && (ctxt->input->filename))
2252 xmlGenericError(xmlGenericErrorContext,
2253 "%s(%d): ", ctxt->input->filename,
2254 ctxt->input->line);
2255 xmlGenericError(xmlGenericErrorContext,
2256 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2257 }
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02002258 if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2259 (ctxt->inputNr > 1024)) {
2260 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2261 while (ctxt->inputNr > 1)
2262 xmlFreeInputStream(inputPop(ctxt));
2263 return(-1);
2264 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002265 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002266 if (ctxt->instate == XML_PARSER_EOF)
2267 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002268 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002269 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002270}
2271
2272/**
2273 * xmlParseCharRef:
2274 * @ctxt: an XML parser context
2275 *
2276 * parse Reference declarations
2277 *
2278 * [66] CharRef ::= '&#' [0-9]+ ';' |
2279 * '&#x' [0-9a-fA-F]+ ';'
2280 *
2281 * [ WFC: Legal Character ]
2282 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002283 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002284 *
2285 * Returns the value parsed (as an int), 0 in case of error
2286 */
2287int
2288xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002289 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002290 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002291 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002292
Owen Taylor3473f882001-02-23 17:55:21 +00002293 /*
2294 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2295 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002296 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002297 (NXT(2) == 'x')) {
2298 SKIP(3);
2299 GROW;
2300 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002301 if (count++ > 20) {
2302 count = 0;
2303 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002304 if (ctxt->instate == XML_PARSER_EOF)
2305 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002306 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002307 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002308 val = val * 16 + (CUR - '0');
2309 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2310 val = val * 16 + (CUR - 'a') + 10;
2311 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2312 val = val * 16 + (CUR - 'A') + 10;
2313 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002314 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002315 val = 0;
2316 break;
2317 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002318 if (val > 0x10FFFF)
2319 outofrange = val;
2320
Owen Taylor3473f882001-02-23 17:55:21 +00002321 NEXT;
2322 count++;
2323 }
2324 if (RAW == ';') {
2325 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002326 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002327 ctxt->nbChars ++;
2328 ctxt->input->cur++;
2329 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002330 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002331 SKIP(2);
2332 GROW;
2333 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002334 if (count++ > 20) {
2335 count = 0;
2336 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002337 if (ctxt->instate == XML_PARSER_EOF)
2338 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002339 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002340 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002341 val = val * 10 + (CUR - '0');
2342 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002343 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002344 val = 0;
2345 break;
2346 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002347 if (val > 0x10FFFF)
2348 outofrange = val;
2349
Owen Taylor3473f882001-02-23 17:55:21 +00002350 NEXT;
2351 count++;
2352 }
2353 if (RAW == ';') {
2354 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002355 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002356 ctxt->nbChars ++;
2357 ctxt->input->cur++;
2358 }
2359 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002360 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002361 }
2362
2363 /*
2364 * [ WFC: Legal Character ]
2365 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002366 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002367 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002368 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002369 return(val);
2370 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002371 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372 "xmlParseCharRef: invalid xmlChar value %d\n",
2373 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002374 }
2375 return(0);
2376}
2377
2378/**
2379 * xmlParseStringCharRef:
2380 * @ctxt: an XML parser context
2381 * @str: a pointer to an index in the string
2382 *
2383 * parse Reference declarations, variant parsing from a string rather
2384 * than an an input flow.
2385 *
2386 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387 * '&#x' [0-9a-fA-F]+ ';'
2388 *
2389 * [ WFC: Legal Character ]
2390 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002391 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002392 *
2393 * Returns the value parsed (as an int), 0 in case of error, str will be
2394 * updated to the current value of the index
2395 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002396static int
Owen Taylor3473f882001-02-23 17:55:21 +00002397xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2398 const xmlChar *ptr;
2399 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002400 unsigned int val = 0;
2401 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002402
2403 if ((str == NULL) || (*str == NULL)) return(0);
2404 ptr = *str;
2405 cur = *ptr;
2406 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2407 ptr += 3;
2408 cur = *ptr;
2409 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002410 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002411 val = val * 16 + (cur - '0');
2412 else if ((cur >= 'a') && (cur <= 'f'))
2413 val = val * 16 + (cur - 'a') + 10;
2414 else if ((cur >= 'A') && (cur <= 'F'))
2415 val = val * 16 + (cur - 'A') + 10;
2416 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002417 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002418 val = 0;
2419 break;
2420 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002421 if (val > 0x10FFFF)
2422 outofrange = val;
2423
Owen Taylor3473f882001-02-23 17:55:21 +00002424 ptr++;
2425 cur = *ptr;
2426 }
2427 if (cur == ';')
2428 ptr++;
2429 } else if ((cur == '&') && (ptr[1] == '#')){
2430 ptr += 2;
2431 cur = *ptr;
2432 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002433 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002434 val = val * 10 + (cur - '0');
2435 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002436 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002437 val = 0;
2438 break;
2439 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002440 if (val > 0x10FFFF)
2441 outofrange = val;
2442
Owen Taylor3473f882001-02-23 17:55:21 +00002443 ptr++;
2444 cur = *ptr;
2445 }
2446 if (cur == ';')
2447 ptr++;
2448 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002449 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002450 return(0);
2451 }
2452 *str = ptr;
2453
2454 /*
2455 * [ WFC: Legal Character ]
2456 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002457 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002458 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002459 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002460 return(val);
2461 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002462 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2463 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2464 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002465 }
2466 return(0);
2467}
2468
2469/**
2470 * xmlParserHandlePEReference:
2471 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002472 *
Owen Taylor3473f882001-02-23 17:55:21 +00002473 * [69] PEReference ::= '%' Name ';'
2474 *
2475 * [ WFC: No Recursion ]
2476 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002477 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002478 *
2479 * [ WFC: Entity Declared ]
2480 * In a document without any DTD, a document with only an internal DTD
2481 * subset which contains no parameter entity references, or a document
2482 * with "standalone='yes'", ... ... The declaration of a parameter
2483 * entity must precede any reference to it...
2484 *
2485 * [ VC: Entity Declared ]
2486 * In a document with an external subset or external parameter entities
2487 * with "standalone='no'", ... ... The declaration of a parameter entity
2488 * must precede any reference to it...
2489 *
2490 * [ WFC: In DTD ]
2491 * Parameter-entity references may only appear in the DTD.
2492 * NOTE: misleading but this is handled.
2493 *
2494 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002495 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002496 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002497 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002498 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002499 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002500 */
2501void
2502xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00002503 switch(ctxt->instate) {
2504 case XML_PARSER_CDATA_SECTION:
2505 return;
2506 case XML_PARSER_COMMENT:
2507 return;
2508 case XML_PARSER_START_TAG:
2509 return;
2510 case XML_PARSER_END_TAG:
2511 return;
2512 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002513 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002514 return;
2515 case XML_PARSER_PROLOG:
2516 case XML_PARSER_START:
2517 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002518 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002519 return;
2520 case XML_PARSER_ENTITY_DECL:
2521 case XML_PARSER_CONTENT:
2522 case XML_PARSER_ATTRIBUTE_VALUE:
2523 case XML_PARSER_PI:
2524 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002525 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002526 /* we just ignore it there */
2527 return;
2528 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002529 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002530 return;
2531 case XML_PARSER_ENTITY_VALUE:
2532 /*
2533 * NOTE: in the case of entity values, we don't do the
2534 * substitution here since we need the literal
2535 * entity value to be able to save the internal
2536 * subset of the document.
2537 * This will be handled by xmlStringDecodeEntities
2538 */
2539 return;
2540 case XML_PARSER_DTD:
2541 /*
2542 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2543 * In the internal DTD subset, parameter-entity references
2544 * can occur only where markup declarations can occur, not
2545 * within markup declarations.
2546 * In that case this is handled in xmlParseMarkupDecl
2547 */
2548 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2549 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002550 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002551 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002552 break;
2553 case XML_PARSER_IGNORE:
2554 return;
2555 }
2556
Nick Wellnhofer03904152017-06-05 21:16:00 +02002557 xmlParsePEReference(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00002558}
2559
2560/*
2561 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002562 * buffer##_size is expected to be a size_t
2563 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002564 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002565#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002566 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002567 size_t new_size = buffer##_size * 2 + n; \
2568 if (new_size < buffer##_size) goto mem_error; \
2569 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002570 if (tmp == NULL) goto mem_error; \
2571 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002572 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002573}
2574
2575/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002576 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002577 * @ctxt: the parser context
2578 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002579 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002580 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2581 * @end: an end marker xmlChar, 0 if none
2582 * @end2: an end marker xmlChar, 0 if none
2583 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002584 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002585 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002586 *
2587 * [67] Reference ::= EntityRef | CharRef
2588 *
2589 * [69] PEReference ::= '%' Name ';'
2590 *
2591 * Returns A newly allocated string with the substitution done. The caller
2592 * must deallocate it !
2593 */
2594xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002595xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2596 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002597 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002598 size_t buffer_size = 0;
2599 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002600
2601 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002602 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002603 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002604 xmlEntityPtr ent;
2605 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002606
Daniel Veillarda82b1822004-11-08 16:24:57 +00002607 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002608 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002609 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002610
Daniel Veillard0161e632008-08-28 15:36:32 +00002611 if (((ctxt->depth > 40) &&
2612 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2613 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002614 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002615 return(NULL);
2616 }
2617
2618 /*
2619 * allocate a translation buffer.
2620 */
2621 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002622 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002623 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002624
2625 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002626 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002627 * we are operating on already parsed values.
2628 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002629 if (str < last)
2630 c = CUR_SCHAR(str, l);
2631 else
2632 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002633 while ((c != 0) && (c != end) && /* non input consuming loop */
2634 (c != end2) && (c != end3)) {
2635
2636 if (c == 0) break;
2637 if ((c == '&') && (str[1] == '#')) {
2638 int val = xmlParseStringCharRef(ctxt, &str);
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002639 if (val == 0)
2640 goto int_error;
2641 COPY_BUF(0,buffer,nbchars,val);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002642 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002643 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002644 }
Owen Taylor3473f882001-02-23 17:55:21 +00002645 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2646 if (xmlParserDebugEntities)
2647 xmlGenericError(xmlGenericErrorContext,
2648 "String decoding Entity Reference: %.30s\n",
2649 str);
2650 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002651 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002652 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002653 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002654 if ((ent != NULL) &&
2655 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2656 if (ent->content != NULL) {
2657 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002658 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002659 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002660 }
Owen Taylor3473f882001-02-23 17:55:21 +00002661 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002662 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2663 "predefined entity has no content\n");
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002664 goto int_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002665 }
2666 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002667 ctxt->depth++;
2668 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2669 0, 0, 0);
2670 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002671 if (rep == NULL)
2672 goto int_error;
Daniel Veillard0161e632008-08-28 15:36:32 +00002673
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002674 current = rep;
2675 while (*current != 0) { /* non input consuming loop */
2676 buffer[nbchars++] = *current++;
2677 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2679 goto int_error;
2680 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2681 }
2682 }
2683 xmlFree(rep);
2684 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002685 } else if (ent != NULL) {
2686 int i = xmlStrlen(ent->name);
2687 const xmlChar *cur = ent->name;
2688
2689 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002690 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002691 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002692 }
2693 for (;i > 0;i--)
2694 buffer[nbchars++] = *cur++;
2695 buffer[nbchars++] = ';';
2696 }
2697 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2698 if (xmlParserDebugEntities)
2699 xmlGenericError(xmlGenericErrorContext,
2700 "String decoding PE Reference: %.30s\n", str);
2701 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002702 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002703 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002704 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002705 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002706 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002707 /*
2708 * Note: external parsed entities will not be loaded,
2709 * it is not required for a non-validating parser to
2710 * complete external PEreferences coming from the
2711 * internal subset
2712 */
2713 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2714 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2715 (ctxt->validate != 0)) {
2716 xmlLoadEntityContent(ctxt, ent);
2717 } else {
2718 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2719 "not validating will not read content for PE entity %s\n",
2720 ent->name, NULL);
2721 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002722 }
Owen Taylor3473f882001-02-23 17:55:21 +00002723 ctxt->depth++;
2724 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2725 0, 0, 0);
2726 ctxt->depth--;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02002727 if (rep == NULL)
2728 goto int_error;
2729 current = rep;
2730 while (*current != 0) { /* non input consuming loop */
2731 buffer[nbchars++] = *current++;
2732 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2733 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2734 goto int_error;
2735 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2736 }
2737 }
2738 xmlFree(rep);
2739 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002740 }
2741 } else {
2742 COPY_BUF(l,buffer,nbchars,c);
2743 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2745 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002746 }
2747 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002748 if (str < last)
2749 c = CUR_SCHAR(str, l);
2750 else
2751 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002752 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002753 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002754 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002755
2756mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002757 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002758int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002759 if (rep != NULL)
2760 xmlFree(rep);
2761 if (buffer != NULL)
2762 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002763 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002764}
2765
Daniel Veillarde57ec792003-09-10 10:50:59 +00002766/**
2767 * xmlStringDecodeEntities:
2768 * @ctxt: the parser context
2769 * @str: the input string
2770 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2771 * @end: an end marker xmlChar, 0 if none
2772 * @end2: an end marker xmlChar, 0 if none
2773 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002774 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002775 * Takes a entity string content and process to do the adequate substitutions.
2776 *
2777 * [67] Reference ::= EntityRef | CharRef
2778 *
2779 * [69] PEReference ::= '%' Name ';'
2780 *
2781 * Returns A newly allocated string with the substitution done. The caller
2782 * must deallocate it !
2783 */
2784xmlChar *
2785xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2786 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002787 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002788 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2789 end, end2, end3));
2790}
Owen Taylor3473f882001-02-23 17:55:21 +00002791
2792/************************************************************************
2793 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002794 * Commodity functions, cleanup needed ? *
2795 * *
2796 ************************************************************************/
2797
2798/**
2799 * areBlanks:
2800 * @ctxt: an XML parser context
2801 * @str: a xmlChar *
2802 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002803 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002804 *
2805 * Is this a sequence of blank chars that one can ignore ?
2806 *
2807 * Returns 1 if ignorable 0 otherwise.
2808 */
2809
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002810static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2811 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002812 int i, ret;
2813 xmlNodePtr lastChild;
2814
Daniel Veillard05c13a22001-09-09 08:38:09 +00002815 /*
2816 * Don't spend time trying to differentiate them, the same callback is
2817 * used !
2818 */
2819 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002820 return(0);
2821
Owen Taylor3473f882001-02-23 17:55:21 +00002822 /*
2823 * Check for xml:space value.
2824 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002825 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2826 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002827 return(0);
2828
2829 /*
2830 * Check that the string is made of blanks
2831 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002832 if (blank_chars == 0) {
2833 for (i = 0;i < len;i++)
2834 if (!(IS_BLANK_CH(str[i]))) return(0);
2835 }
Owen Taylor3473f882001-02-23 17:55:21 +00002836
2837 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002838 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002839 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002840 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002841 if (ctxt->myDoc != NULL) {
2842 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2843 if (ret == 0) return(1);
2844 if (ret == 1) return(0);
2845 }
2846
2847 /*
2848 * Otherwise, heuristic :-\
2849 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002850 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002851 if ((ctxt->node->children == NULL) &&
2852 (RAW == '<') && (NXT(1) == '/')) return(0);
2853
2854 lastChild = xmlGetLastChild(ctxt->node);
2855 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002856 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2857 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002858 } else if (xmlNodeIsText(lastChild))
2859 return(0);
2860 else if ((ctxt->node->children != NULL) &&
2861 (xmlNodeIsText(ctxt->node->children)))
2862 return(0);
2863 return(1);
2864}
2865
Owen Taylor3473f882001-02-23 17:55:21 +00002866/************************************************************************
2867 * *
2868 * Extra stuff for namespace support *
2869 * Relates to http://www.w3.org/TR/WD-xml-names *
2870 * *
2871 ************************************************************************/
2872
2873/**
2874 * xmlSplitQName:
2875 * @ctxt: an XML parser context
2876 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002877 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002878 *
2879 * parse an UTF8 encoded XML qualified name string
2880 *
2881 * [NS 5] QName ::= (Prefix ':')? LocalPart
2882 *
2883 * [NS 6] Prefix ::= NCName
2884 *
2885 * [NS 7] LocalPart ::= NCName
2886 *
2887 * Returns the local part, and prefix is updated
2888 * to get the Prefix if any.
2889 */
2890
2891xmlChar *
2892xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2893 xmlChar buf[XML_MAX_NAMELEN + 5];
2894 xmlChar *buffer = NULL;
2895 int len = 0;
2896 int max = XML_MAX_NAMELEN;
2897 xmlChar *ret = NULL;
2898 const xmlChar *cur = name;
2899 int c;
2900
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002901 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002902 *prefix = NULL;
2903
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002904 if (cur == NULL) return(NULL);
2905
Owen Taylor3473f882001-02-23 17:55:21 +00002906#ifndef XML_XML_NAMESPACE
2907 /* xml: prefix is not really a namespace */
2908 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2909 (cur[2] == 'l') && (cur[3] == ':'))
2910 return(xmlStrdup(name));
2911#endif
2912
Daniel Veillard597bc482003-07-24 16:08:28 +00002913 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002914 if (cur[0] == ':')
2915 return(xmlStrdup(name));
2916
2917 c = *cur++;
2918 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2919 buf[len++] = c;
2920 c = *cur++;
2921 }
2922 if (len >= max) {
2923 /*
2924 * Okay someone managed to make a huge name, so he's ready to pay
2925 * for the processing speed.
2926 */
2927 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002928
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002929 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002930 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002931 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002932 return(NULL);
2933 }
2934 memcpy(buffer, buf, len);
2935 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2936 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002937 xmlChar *tmp;
2938
Owen Taylor3473f882001-02-23 17:55:21 +00002939 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002940 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002941 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002942 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002943 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002944 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002945 return(NULL);
2946 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002947 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002948 }
2949 buffer[len++] = c;
2950 c = *cur++;
2951 }
2952 buffer[len] = 0;
2953 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002954
Daniel Veillard597bc482003-07-24 16:08:28 +00002955 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002956 if (buffer != NULL)
2957 xmlFree(buffer);
2958 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002959 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002960 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002961
Owen Taylor3473f882001-02-23 17:55:21 +00002962 if (buffer == NULL)
2963 ret = xmlStrndup(buf, len);
2964 else {
2965 ret = buffer;
2966 buffer = NULL;
2967 max = XML_MAX_NAMELEN;
2968 }
2969
2970
2971 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002972 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002973 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002974 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002975 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002976 }
Owen Taylor3473f882001-02-23 17:55:21 +00002977 len = 0;
2978
Daniel Veillardbb284f42002-10-16 18:02:47 +00002979 /*
2980 * Check that the first character is proper to start
2981 * a new name
2982 */
2983 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2984 ((c >= 0x41) && (c <= 0x5A)) ||
2985 (c == '_') || (c == ':'))) {
2986 int l;
2987 int first = CUR_SCHAR(cur, l);
2988
2989 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002990 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002991 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002992 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002993 }
2994 }
2995 cur++;
2996
Owen Taylor3473f882001-02-23 17:55:21 +00002997 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2998 buf[len++] = c;
2999 c = *cur++;
3000 }
3001 if (len >= max) {
3002 /*
3003 * Okay someone managed to make a huge name, so he's ready to pay
3004 * for the processing speed.
3005 */
3006 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003007
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003008 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003009 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003010 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003011 return(NULL);
3012 }
3013 memcpy(buffer, buf, len);
3014 while (c != 0) { /* tested bigname2.xml */
3015 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003016 xmlChar *tmp;
3017
Owen Taylor3473f882001-02-23 17:55:21 +00003018 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003019 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003020 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003021 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003022 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003023 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003024 return(NULL);
3025 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003026 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003027 }
3028 buffer[len++] = c;
3029 c = *cur++;
3030 }
3031 buffer[len] = 0;
3032 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003033
Owen Taylor3473f882001-02-23 17:55:21 +00003034 if (buffer == NULL)
3035 ret = xmlStrndup(buf, len);
3036 else {
3037 ret = buffer;
3038 }
3039 }
3040
3041 return(ret);
3042}
3043
3044/************************************************************************
3045 * *
3046 * The parser itself *
3047 * Relates to http://www.w3.org/TR/REC-xml *
3048 * *
3049 ************************************************************************/
3050
Daniel Veillard34e3f642008-07-29 09:02:27 +00003051/************************************************************************
3052 * *
3053 * Routines to parse Name, NCName and NmToken *
3054 * *
3055 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003056#ifdef DEBUG
3057static unsigned long nbParseName = 0;
3058static unsigned long nbParseNmToken = 0;
3059static unsigned long nbParseNCName = 0;
3060static unsigned long nbParseNCNameComplex = 0;
3061static unsigned long nbParseNameComplex = 0;
3062static unsigned long nbParseStringName = 0;
3063#endif
3064
Daniel Veillard34e3f642008-07-29 09:02:27 +00003065/*
3066 * The two following functions are related to the change of accepted
3067 * characters for Name and NmToken in the Revision 5 of XML-1.0
3068 * They correspond to the modified production [4] and the new production [4a]
3069 * changes in that revision. Also note that the macros used for the
3070 * productions Letter, Digit, CombiningChar and Extender are not needed
3071 * anymore.
3072 * We still keep compatibility to pre-revision5 parsing semantic if the
3073 * new XML_PARSE_OLD10 option is given to the parser.
3074 */
3075static int
3076xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3077 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3078 /*
3079 * Use the new checks of production [4] [4a] amd [5] of the
3080 * Update 5 of XML-1.0
3081 */
3082 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3083 (((c >= 'a') && (c <= 'z')) ||
3084 ((c >= 'A') && (c <= 'Z')) ||
3085 (c == '_') || (c == ':') ||
3086 ((c >= 0xC0) && (c <= 0xD6)) ||
3087 ((c >= 0xD8) && (c <= 0xF6)) ||
3088 ((c >= 0xF8) && (c <= 0x2FF)) ||
3089 ((c >= 0x370) && (c <= 0x37D)) ||
3090 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3091 ((c >= 0x200C) && (c <= 0x200D)) ||
3092 ((c >= 0x2070) && (c <= 0x218F)) ||
3093 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3094 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3095 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3096 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3097 ((c >= 0x10000) && (c <= 0xEFFFF))))
3098 return(1);
3099 } else {
3100 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3101 return(1);
3102 }
3103 return(0);
3104}
3105
3106static int
3107xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3108 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3109 /*
3110 * Use the new checks of production [4] [4a] amd [5] of the
3111 * Update 5 of XML-1.0
3112 */
3113 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3114 (((c >= 'a') && (c <= 'z')) ||
3115 ((c >= 'A') && (c <= 'Z')) ||
3116 ((c >= '0') && (c <= '9')) || /* !start */
3117 (c == '_') || (c == ':') ||
3118 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3119 ((c >= 0xC0) && (c <= 0xD6)) ||
3120 ((c >= 0xD8) && (c <= 0xF6)) ||
3121 ((c >= 0xF8) && (c <= 0x2FF)) ||
3122 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3123 ((c >= 0x370) && (c <= 0x37D)) ||
3124 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3125 ((c >= 0x200C) && (c <= 0x200D)) ||
3126 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3127 ((c >= 0x2070) && (c <= 0x218F)) ||
3128 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3129 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3130 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3131 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3132 ((c >= 0x10000) && (c <= 0xEFFFF))))
3133 return(1);
3134 } else {
3135 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3136 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003137 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003138 (IS_COMBINING(c)) ||
3139 (IS_EXTENDER(c)))
3140 return(1);
3141 }
3142 return(0);
3143}
3144
Daniel Veillarde57ec792003-09-10 10:50:59 +00003145static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003146 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003147
Daniel Veillard34e3f642008-07-29 09:02:27 +00003148static const xmlChar *
3149xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3150 int len = 0, l;
3151 int c;
3152 int count = 0;
3153
Daniel Veillardc6561462009-03-25 10:22:31 +00003154#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003155 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003156#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003157
3158 /*
3159 * Handler for more complex cases
3160 */
3161 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003162 if (ctxt->instate == XML_PARSER_EOF)
3163 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003164 c = CUR_CHAR(l);
3165 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3166 /*
3167 * Use the new checks of production [4] [4a] amd [5] of the
3168 * Update 5 of XML-1.0
3169 */
3170 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3171 (!(((c >= 'a') && (c <= 'z')) ||
3172 ((c >= 'A') && (c <= 'Z')) ||
3173 (c == '_') || (c == ':') ||
3174 ((c >= 0xC0) && (c <= 0xD6)) ||
3175 ((c >= 0xD8) && (c <= 0xF6)) ||
3176 ((c >= 0xF8) && (c <= 0x2FF)) ||
3177 ((c >= 0x370) && (c <= 0x37D)) ||
3178 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179 ((c >= 0x200C) && (c <= 0x200D)) ||
3180 ((c >= 0x2070) && (c <= 0x218F)) ||
3181 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3182 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3183 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3184 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3185 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3186 return(NULL);
3187 }
3188 len += l;
3189 NEXTL(l);
3190 c = CUR_CHAR(l);
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3192 (((c >= 'a') && (c <= 'z')) ||
3193 ((c >= 'A') && (c <= 'Z')) ||
3194 ((c >= '0') && (c <= '9')) || /* !start */
3195 (c == '_') || (c == ':') ||
3196 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3197 ((c >= 0xC0) && (c <= 0xD6)) ||
3198 ((c >= 0xD8) && (c <= 0xF6)) ||
3199 ((c >= 0xF8) && (c <= 0x2FF)) ||
3200 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3201 ((c >= 0x370) && (c <= 0x37D)) ||
3202 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3203 ((c >= 0x200C) && (c <= 0x200D)) ||
3204 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3205 ((c >= 0x2070) && (c <= 0x218F)) ||
3206 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3207 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3208 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3209 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3210 ((c >= 0x10000) && (c <= 0xEFFFF))
3211 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003212 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003213 count = 0;
3214 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003215 if (ctxt->instate == XML_PARSER_EOF)
3216 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003217 }
3218 len += l;
3219 NEXTL(l);
3220 c = CUR_CHAR(l);
3221 }
3222 } else {
3223 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3224 (!IS_LETTER(c) && (c != '_') &&
3225 (c != ':'))) {
3226 return(NULL);
3227 }
3228 len += l;
3229 NEXTL(l);
3230 c = CUR_CHAR(l);
3231
3232 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3233 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3234 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003235 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003236 (IS_COMBINING(c)) ||
3237 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003238 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003239 count = 0;
3240 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003241 if (ctxt->instate == XML_PARSER_EOF)
3242 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003243 }
3244 len += l;
3245 NEXTL(l);
3246 c = CUR_CHAR(l);
3247 }
3248 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003249 if ((len > XML_MAX_NAME_LENGTH) &&
3250 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3251 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3252 return(NULL);
3253 }
Nick Wellnhofere2663052017-06-05 15:37:17 +02003254 if (ctxt->input->cur - ctxt->input->base < len) {
3255 /*
3256 * There were a couple of bugs where PERefs lead to to a change
3257 * of the buffer. Check the buffer size to avoid passing an invalid
3258 * pointer to xmlDictLookup.
3259 */
3260 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3261 "unexpected change of input buffer");
3262 return (NULL);
3263 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003264 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3265 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3266 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3267}
3268
Owen Taylor3473f882001-02-23 17:55:21 +00003269/**
3270 * xmlParseName:
3271 * @ctxt: an XML parser context
3272 *
3273 * parse an XML name.
3274 *
3275 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3276 * CombiningChar | Extender
3277 *
3278 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3279 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003280 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003281 *
3282 * Returns the Name parsed or NULL
3283 */
3284
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003285const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003286xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003287 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003288 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003289 int count = 0;
3290
3291 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003292
Daniel Veillardc6561462009-03-25 10:22:31 +00003293#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003294 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003295#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003296
Daniel Veillard48b2f892001-02-25 16:11:03 +00003297 /*
3298 * Accelerator for simple ASCII names
3299 */
3300 in = ctxt->input->cur;
3301 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3302 ((*in >= 0x41) && (*in <= 0x5A)) ||
3303 (*in == '_') || (*in == ':')) {
3304 in++;
3305 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3306 ((*in >= 0x41) && (*in <= 0x5A)) ||
3307 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003308 (*in == '_') || (*in == '-') ||
3309 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003310 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003311 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003312 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003313 if ((count > XML_MAX_NAME_LENGTH) &&
3314 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3315 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3316 return(NULL);
3317 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003318 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003319 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003320 ctxt->nbChars += count;
3321 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003322 if (ret == NULL)
3323 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003324 return(ret);
3325 }
3326 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003328 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003329}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003330
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331static const xmlChar *
3332xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3333 int len = 0, l;
3334 int c;
3335 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003336 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003337
Daniel Veillardc6561462009-03-25 10:22:31 +00003338#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003339 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003340#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003341
3342 /*
3343 * Handler for more complex cases
3344 */
3345 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003346 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003347 c = CUR_CHAR(l);
3348 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3349 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3350 return(NULL);
3351 }
3352
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3354 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003355 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003356 if ((len > XML_MAX_NAME_LENGTH) &&
3357 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3358 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3359 return(NULL);
3360 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003361 count = 0;
3362 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003363 if (ctxt->instate == XML_PARSER_EOF)
3364 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003365 }
3366 len += l;
3367 NEXTL(l);
3368 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003369 if (c == 0) {
3370 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003371 /*
3372 * when shrinking to extend the buffer we really need to preserve
3373 * the part of the name we already parsed. Hence rolling back
3374 * by current lenght.
3375 */
3376 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003377 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003378 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003379 if (ctxt->instate == XML_PARSER_EOF)
3380 return(NULL);
3381 c = CUR_CHAR(l);
3382 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003383 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003384 if ((len > XML_MAX_NAME_LENGTH) &&
3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3387 return(NULL);
3388 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003389 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003390}
3391
3392/**
3393 * xmlParseNCName:
3394 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003395 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003396 *
3397 * parse an XML name.
3398 *
3399 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3400 * CombiningChar | Extender
3401 *
3402 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3403 *
3404 * Returns the Name parsed or NULL
3405 */
3406
3407static const xmlChar *
3408xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003409 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003410 const xmlChar *ret;
3411 int count = 0;
3412
Daniel Veillardc6561462009-03-25 10:22:31 +00003413#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003414 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003415#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003416
3417 /*
3418 * Accelerator for simple ASCII names
3419 */
3420 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003421 e = ctxt->input->end;
3422 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3423 ((*in >= 0x41) && (*in <= 0x5A)) ||
3424 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003425 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003426 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3427 ((*in >= 0x41) && (*in <= 0x5A)) ||
3428 ((*in >= 0x30) && (*in <= 0x39)) ||
3429 (*in == '_') || (*in == '-') ||
3430 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003431 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003432 if (in >= e)
3433 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003434 if ((*in > 0) && (*in < 0x80)) {
3435 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003436 if ((count > XML_MAX_NAME_LENGTH) &&
3437 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3438 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3439 return(NULL);
3440 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003441 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3442 ctxt->input->cur = in;
3443 ctxt->nbChars += count;
3444 ctxt->input->col += count;
3445 if (ret == NULL) {
3446 xmlErrMemory(ctxt, NULL);
3447 }
3448 return(ret);
3449 }
3450 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003451complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003452 return(xmlParseNCNameComplex(ctxt));
3453}
3454
Daniel Veillard46de64e2002-05-29 08:21:33 +00003455/**
3456 * xmlParseNameAndCompare:
3457 * @ctxt: an XML parser context
3458 *
3459 * parse an XML name and compares for match
3460 * (specialized for endtag parsing)
3461 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003462 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3463 * and the name for mismatch
3464 */
3465
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003466static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003467xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003468 register const xmlChar *cmp = other;
3469 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003470 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003471
3472 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003473 if (ctxt->instate == XML_PARSER_EOF)
3474 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003475
Daniel Veillard46de64e2002-05-29 08:21:33 +00003476 in = ctxt->input->cur;
3477 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003479 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003480 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003481 }
William M. Brack76e95df2003-10-18 16:20:14 +00003482 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003483 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003484 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003485 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003486 }
3487 /* failure (or end of input buffer), check with full function */
3488 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003489 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003490 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003491 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003492 }
3493 return ret;
3494}
3495
Owen Taylor3473f882001-02-23 17:55:21 +00003496/**
3497 * xmlParseStringName:
3498 * @ctxt: an XML parser context
3499 * @str: a pointer to the string pointer (IN/OUT)
3500 *
3501 * parse an XML name.
3502 *
3503 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3504 * CombiningChar | Extender
3505 *
3506 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3507 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003508 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003509 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003510 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003511 * is updated to the current location in the string.
3512 */
3513
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003514static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003515xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3516 xmlChar buf[XML_MAX_NAMELEN + 5];
3517 const xmlChar *cur = *str;
3518 int len = 0, l;
3519 int c;
3520
Daniel Veillardc6561462009-03-25 10:22:31 +00003521#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003522 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003523#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003524
Owen Taylor3473f882001-02-23 17:55:21 +00003525 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003526 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003527 return(NULL);
3528 }
3529
Daniel Veillard34e3f642008-07-29 09:02:27 +00003530 COPY_BUF(l,buf,len,c);
3531 cur += l;
3532 c = CUR_SCHAR(cur, l);
3533 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003534 COPY_BUF(l,buf,len,c);
3535 cur += l;
3536 c = CUR_SCHAR(cur, l);
3537 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3538 /*
3539 * Okay someone managed to make a huge name, so he's ready to pay
3540 * for the processing speed.
3541 */
3542 xmlChar *buffer;
3543 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003544
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003545 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003546 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003547 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003548 return(NULL);
3549 }
3550 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003551 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003552 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003553 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003554
3555 if ((len > XML_MAX_NAME_LENGTH) &&
3556 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3557 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3558 xmlFree(buffer);
3559 return(NULL);
3560 }
Owen Taylor3473f882001-02-23 17:55:21 +00003561 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003562 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003563 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003564 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003565 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003566 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003567 return(NULL);
3568 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003569 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003570 }
3571 COPY_BUF(l,buffer,len,c);
3572 cur += l;
3573 c = CUR_SCHAR(cur, l);
3574 }
3575 buffer[len] = 0;
3576 *str = cur;
3577 return(buffer);
3578 }
3579 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003580 if ((len > XML_MAX_NAME_LENGTH) &&
3581 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3582 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3583 return(NULL);
3584 }
Owen Taylor3473f882001-02-23 17:55:21 +00003585 *str = cur;
3586 return(xmlStrndup(buf, len));
3587}
3588
3589/**
3590 * xmlParseNmtoken:
3591 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003592 *
Owen Taylor3473f882001-02-23 17:55:21 +00003593 * parse an XML Nmtoken.
3594 *
3595 * [7] Nmtoken ::= (NameChar)+
3596 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003597 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003598 *
3599 * Returns the Nmtoken parsed or NULL
3600 */
3601
3602xmlChar *
3603xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3604 xmlChar buf[XML_MAX_NAMELEN + 5];
3605 int len = 0, l;
3606 int c;
3607 int count = 0;
3608
Daniel Veillardc6561462009-03-25 10:22:31 +00003609#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003610 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003611#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003612
Owen Taylor3473f882001-02-23 17:55:21 +00003613 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003614 if (ctxt->instate == XML_PARSER_EOF)
3615 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003616 c = CUR_CHAR(l);
3617
Daniel Veillard34e3f642008-07-29 09:02:27 +00003618 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003619 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003620 count = 0;
3621 GROW;
3622 }
3623 COPY_BUF(l,buf,len,c);
3624 NEXTL(l);
3625 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003626 if (c == 0) {
3627 count = 0;
3628 GROW;
3629 if (ctxt->instate == XML_PARSER_EOF)
3630 return(NULL);
3631 c = CUR_CHAR(l);
3632 }
Owen Taylor3473f882001-02-23 17:55:21 +00003633 if (len >= XML_MAX_NAMELEN) {
3634 /*
3635 * Okay someone managed to make a huge token, so he's ready to pay
3636 * for the processing speed.
3637 */
3638 xmlChar *buffer;
3639 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003640
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003641 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003642 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003643 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003644 return(NULL);
3645 }
3646 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003647 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003648 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003649 count = 0;
3650 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003651 if (ctxt->instate == XML_PARSER_EOF) {
3652 xmlFree(buffer);
3653 return(NULL);
3654 }
Owen Taylor3473f882001-02-23 17:55:21 +00003655 }
3656 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003657 xmlChar *tmp;
3658
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003659 if ((max > XML_MAX_NAME_LENGTH) &&
3660 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3661 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3662 xmlFree(buffer);
3663 return(NULL);
3664 }
Owen Taylor3473f882001-02-23 17:55:21 +00003665 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003666 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003667 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003668 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003669 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003670 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003671 return(NULL);
3672 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003673 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003674 }
3675 COPY_BUF(l,buffer,len,c);
3676 NEXTL(l);
3677 c = CUR_CHAR(l);
3678 }
3679 buffer[len] = 0;
3680 return(buffer);
3681 }
3682 }
3683 if (len == 0)
3684 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003685 if ((len > XML_MAX_NAME_LENGTH) &&
3686 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3687 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3688 return(NULL);
3689 }
Owen Taylor3473f882001-02-23 17:55:21 +00003690 return(xmlStrndup(buf, len));
3691}
3692
3693/**
3694 * xmlParseEntityValue:
3695 * @ctxt: an XML parser context
3696 * @orig: if non-NULL store a copy of the original entity value
3697 *
3698 * parse a value for ENTITY declarations
3699 *
3700 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3701 * "'" ([^%&'] | PEReference | Reference)* "'"
3702 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003703 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003704 */
3705
3706xmlChar *
3707xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3708 xmlChar *buf = NULL;
3709 int len = 0;
3710 int size = XML_PARSER_BUFFER_SIZE;
3711 int c, l;
3712 xmlChar stop;
3713 xmlChar *ret = NULL;
3714 const xmlChar *cur = NULL;
3715 xmlParserInputPtr input;
3716
3717 if (RAW == '"') stop = '"';
3718 else if (RAW == '\'') stop = '\'';
3719 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003720 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003721 return(NULL);
3722 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003723 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003724 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003725 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003726 return(NULL);
3727 }
3728
3729 /*
3730 * The content of the entity definition is copied in a buffer.
3731 */
3732
3733 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3734 input = ctxt->input;
3735 GROW;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003736 if (ctxt->instate == XML_PARSER_EOF)
3737 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003738 NEXT;
3739 c = CUR_CHAR(l);
3740 /*
3741 * NOTE: 4.4.5 Included in Literal
3742 * When a parameter entity reference appears in a literal entity
3743 * value, ... a single or double quote character in the replacement
3744 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003745 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003746 * In practice it means we stop the loop only when back at parsing
3747 * the initial entity and the quote is found
3748 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003749 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3750 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003751 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003752 xmlChar *tmp;
3753
Owen Taylor3473f882001-02-23 17:55:21 +00003754 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003755 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3756 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003757 xmlErrMemory(ctxt, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003758 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003759 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003760 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003761 }
3762 COPY_BUF(l,buf,len,c);
3763 NEXTL(l);
Owen Taylor3473f882001-02-23 17:55:21 +00003764
3765 GROW;
3766 c = CUR_CHAR(l);
3767 if (c == 0) {
3768 GROW;
3769 c = CUR_CHAR(l);
3770 }
3771 }
3772 buf[len] = 0;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003773 if (ctxt->instate == XML_PARSER_EOF)
3774 goto error;
3775 if (c != stop) {
3776 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3777 goto error;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003778 }
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003779 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00003780
3781 /*
3782 * Raise problem w.r.t. '&' and '%' being used in non-entities
3783 * reference constructs. Note Charref will be handled in
3784 * xmlStringDecodeEntities()
3785 */
3786 cur = buf;
3787 while (*cur != 0) { /* non input consuming */
3788 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3789 xmlChar *name;
3790 xmlChar tmp = *cur;
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003791 int nameOk = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003792
3793 cur++;
3794 name = xmlParseStringName(ctxt, &cur);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003795 if (name != NULL) {
3796 nameOk = 1;
3797 xmlFree(name);
3798 }
3799 if ((nameOk == 0) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003800 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003801 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003802 tmp);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003803 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003804 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003805 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3806 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003807 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003808 goto error;
Owen Taylor3473f882001-02-23 17:55:21 +00003809 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003810 if (*cur == 0)
3811 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003812 }
3813 cur++;
3814 }
3815
3816 /*
3817 * Then PEReference entities are substituted.
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003818 *
3819 * NOTE: 4.4.7 Bypassed
3820 * When a general entity reference appears in the EntityValue in
3821 * an entity declaration, it is bypassed and left as is.
3822 * so XML_SUBSTITUTE_REF is not set here.
Owen Taylor3473f882001-02-23 17:55:21 +00003823 */
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003824 ++ctxt->depth;
3825 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3826 0, 0, 0);
3827 --ctxt->depth;
3828 if (orig != NULL) {
3829 *orig = buf;
3830 buf = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003831 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003832
Nick Wellnhofer0fcab652017-09-07 18:25:11 +02003833error:
3834 if (buf != NULL)
3835 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003836 return(ret);
3837}
3838
3839/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003840 * xmlParseAttValueComplex:
3841 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003842 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003843 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003844 *
3845 * parse a value for an attribute, this is the fallback function
3846 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003847 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003848 *
3849 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3850 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003851static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003852xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003853 xmlChar limit = 0;
3854 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003855 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003856 size_t len = 0;
3857 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003858 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003859 xmlChar *current = NULL;
3860 xmlEntityPtr ent;
3861
Owen Taylor3473f882001-02-23 17:55:21 +00003862 if (NXT(0) == '"') {
3863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3864 limit = '"';
3865 NEXT;
3866 } else if (NXT(0) == '\'') {
3867 limit = '\'';
3868 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3869 NEXT;
3870 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003871 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003872 return(NULL);
3873 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003874
Owen Taylor3473f882001-02-23 17:55:21 +00003875 /*
3876 * allocate a translation buffer.
3877 */
3878 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003879 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003880 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003881
3882 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003883 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003884 */
3885 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003886 while (((NXT(0) != limit) && /* checked */
3887 (IS_CHAR(c)) && (c != '<')) &&
3888 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003889 /*
3890 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3891 * special option is given
3892 */
3893 if ((len > XML_MAX_TEXT_LENGTH) &&
3894 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3895 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003896 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003897 goto mem_error;
3898 }
Owen Taylor3473f882001-02-23 17:55:21 +00003899 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003900 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003901 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003902 if (NXT(1) == '#') {
3903 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003904
Owen Taylor3473f882001-02-23 17:55:21 +00003905 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003906 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003907 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003908 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003909 }
3910 buf[len++] = '&';
3911 } else {
3912 /*
3913 * The reparsing will be done in xmlStringGetNodeList()
3914 * called by the attribute() function in SAX.c
3915 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003916 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003917 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003918 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003919 buf[len++] = '&';
3920 buf[len++] = '#';
3921 buf[len++] = '3';
3922 buf[len++] = '8';
3923 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003924 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003925 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003926 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003927 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003928 }
Owen Taylor3473f882001-02-23 17:55:21 +00003929 len += xmlCopyChar(0, &buf[len], val);
3930 }
3931 } else {
3932 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003933 ctxt->nbentities++;
3934 if (ent != NULL)
3935 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003936 if ((ent != NULL) &&
3937 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003938 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003939 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003940 }
3941 if ((ctxt->replaceEntities == 0) &&
3942 (ent->content[0] == '&')) {
3943 buf[len++] = '&';
3944 buf[len++] = '#';
3945 buf[len++] = '3';
3946 buf[len++] = '8';
3947 buf[len++] = ';';
3948 } else {
3949 buf[len++] = ent->content[0];
3950 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003951 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003952 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003953 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02003954 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003955 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003956 XML_SUBSTITUTE_REF,
3957 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003958 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003959 if (rep != NULL) {
3960 current = rep;
3961 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003962 if ((*current == 0xD) || (*current == 0xA) ||
3963 (*current == 0x9)) {
3964 buf[len++] = 0x20;
3965 current++;
3966 } else
3967 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003968 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003969 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 }
3971 }
3972 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003973 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003974 }
3975 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003976 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003977 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003978 }
Owen Taylor3473f882001-02-23 17:55:21 +00003979 if (ent->content != NULL)
3980 buf[len++] = ent->content[0];
3981 }
3982 } else if (ent != NULL) {
3983 int i = xmlStrlen(ent->name);
3984 const xmlChar *cur = ent->name;
3985
3986 /*
3987 * This may look absurd but is needed to detect
3988 * entities problems
3989 */
3990 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003991 (ent->content != NULL) && (ent->checked == 0)) {
3992 unsigned long oldnbent = ctxt->nbentities;
3993
Peter Simons8f30bdf2016-04-15 11:56:55 +02003994 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003995 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003996 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003997 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08003998
Daniel Veillardcff25462013-03-11 15:57:55 +08003999 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004000 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004001 if (xmlStrchr(rep, '<'))
4002 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004003 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004004 rep = NULL;
Nick Wellnhoferabbda932017-09-11 01:14:16 +02004005 } else {
4006 ent->content[0] = 0;
4007 }
Owen Taylor3473f882001-02-23 17:55:21 +00004008 }
4009
4010 /*
4011 * Just output the reference
4012 */
4013 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004014 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004015 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004016 }
4017 for (;i > 0;i--)
4018 buf[len++] = *cur++;
4019 buf[len++] = ';';
4020 }
4021 }
4022 } else {
4023 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004024 if ((len != 0) || (!normalize)) {
4025 if ((!normalize) || (!in_space)) {
4026 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004027 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004028 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004029 }
4030 }
4031 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 }
4033 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004034 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004035 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004036 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004037 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004038 }
4039 }
4040 NEXTL(l);
4041 }
4042 GROW;
4043 c = CUR_CHAR(l);
4044 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004045 if (ctxt->instate == XML_PARSER_EOF)
4046 goto error;
4047
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004048 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004049 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004050 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004051 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004052 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004053 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004054 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004055 if ((c != 0) && (!IS_CHAR(c))) {
4056 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4057 "invalid character in attribute value\n");
4058 } else {
4059 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4060 "AttValue: ' expected\n");
4061 }
Owen Taylor3473f882001-02-23 17:55:21 +00004062 } else
4063 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004064
4065 /*
4066 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004067 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004068 */
4069 if (len >= INT_MAX) {
4070 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004071 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004072 goto mem_error;
4073 }
4074
4075 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004076 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004077
4078mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004079 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004080error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004081 if (buf != NULL)
4082 xmlFree(buf);
4083 if (rep != NULL)
4084 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004085 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004086}
4087
4088/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004089 * xmlParseAttValue:
4090 * @ctxt: an XML parser context
4091 *
4092 * parse a value for an attribute
4093 * Note: the parser won't do substitution of entities here, this
4094 * will be handled later in xmlStringGetNodeList
4095 *
4096 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4097 * "'" ([^<&'] | Reference)* "'"
4098 *
4099 * 3.3.3 Attribute-Value Normalization:
4100 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004101 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004102 * - a character reference is processed by appending the referenced
4103 * character to the attribute value
4104 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004105 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004106 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4107 * appending #x20 to the normalized value, except that only a single
4108 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004109 * parsed entity or the literal entity value of an internal parsed entity
4110 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004111 * If the declared value is not CDATA, then the XML processor must further
4112 * process the normalized attribute value by discarding any leading and
4113 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004114 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004115 * All attributes for which no declaration has been read should be treated
4116 * by a non-validating parser as if declared CDATA.
4117 *
4118 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4119 */
4120
4121
4122xmlChar *
4123xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004124 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004125 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004126}
4127
4128/**
Owen Taylor3473f882001-02-23 17:55:21 +00004129 * xmlParseSystemLiteral:
4130 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004131 *
Owen Taylor3473f882001-02-23 17:55:21 +00004132 * parse an XML Literal
4133 *
4134 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4135 *
4136 * Returns the SystemLiteral parsed or NULL
4137 */
4138
4139xmlChar *
4140xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4141 xmlChar *buf = NULL;
4142 int len = 0;
4143 int size = XML_PARSER_BUFFER_SIZE;
4144 int cur, l;
4145 xmlChar stop;
4146 int state = ctxt->instate;
4147 int count = 0;
4148
4149 SHRINK;
4150 if (RAW == '"') {
4151 NEXT;
4152 stop = '"';
4153 } else if (RAW == '\'') {
4154 NEXT;
4155 stop = '\'';
4156 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004157 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004158 return(NULL);
4159 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004160
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004161 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004162 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004163 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004164 return(NULL);
4165 }
4166 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4167 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004168 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004169 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004170 xmlChar *tmp;
4171
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004172 if ((size > XML_MAX_NAME_LENGTH) &&
4173 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4174 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4175 xmlFree(buf);
4176 ctxt->instate = (xmlParserInputState) state;
4177 return(NULL);
4178 }
Owen Taylor3473f882001-02-23 17:55:21 +00004179 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004180 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4181 if (tmp == NULL) {
4182 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004183 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004184 ctxt->instate = (xmlParserInputState) state;
4185 return(NULL);
4186 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004187 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004188 }
4189 count++;
4190 if (count > 50) {
4191 GROW;
4192 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004193 if (ctxt->instate == XML_PARSER_EOF) {
4194 xmlFree(buf);
4195 return(NULL);
4196 }
Owen Taylor3473f882001-02-23 17:55:21 +00004197 }
4198 COPY_BUF(l,buf,len,cur);
4199 NEXTL(l);
4200 cur = CUR_CHAR(l);
4201 if (cur == 0) {
4202 GROW;
4203 SHRINK;
4204 cur = CUR_CHAR(l);
4205 }
4206 }
4207 buf[len] = 0;
4208 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004209 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004210 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004211 } else {
4212 NEXT;
4213 }
4214 return(buf);
4215}
4216
4217/**
4218 * xmlParsePubidLiteral:
4219 * @ctxt: an XML parser context
4220 *
4221 * parse an XML public literal
4222 *
4223 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4224 *
4225 * Returns the PubidLiteral parsed or NULL.
4226 */
4227
4228xmlChar *
4229xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4230 xmlChar *buf = NULL;
4231 int len = 0;
4232 int size = XML_PARSER_BUFFER_SIZE;
4233 xmlChar cur;
4234 xmlChar stop;
4235 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004236 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004237
4238 SHRINK;
4239 if (RAW == '"') {
4240 NEXT;
4241 stop = '"';
4242 } else if (RAW == '\'') {
4243 NEXT;
4244 stop = '\'';
4245 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004246 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004247 return(NULL);
4248 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004249 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004250 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004251 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004252 return(NULL);
4253 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004254 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004255 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004256 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004257 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004258 xmlChar *tmp;
4259
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004260 if ((size > XML_MAX_NAME_LENGTH) &&
4261 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4262 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4263 xmlFree(buf);
4264 return(NULL);
4265 }
Owen Taylor3473f882001-02-23 17:55:21 +00004266 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004267 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4268 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004269 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004270 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004271 return(NULL);
4272 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004273 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004274 }
4275 buf[len++] = cur;
4276 count++;
4277 if (count > 50) {
4278 GROW;
4279 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004280 if (ctxt->instate == XML_PARSER_EOF) {
4281 xmlFree(buf);
4282 return(NULL);
4283 }
Owen Taylor3473f882001-02-23 17:55:21 +00004284 }
4285 NEXT;
4286 cur = CUR;
4287 if (cur == 0) {
4288 GROW;
4289 SHRINK;
4290 cur = CUR;
4291 }
4292 }
4293 buf[len] = 0;
4294 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004295 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004296 } else {
4297 NEXT;
4298 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004299 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004300 return(buf);
4301}
4302
Daniel Veillard8ed10722009-08-20 19:17:36 +02004303static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004304
4305/*
4306 * used for the test in the inner loop of the char data testing
4307 */
4308static const unsigned char test_char_data[256] = {
4309 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4310 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4311 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4312 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4313 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4314 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4315 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4316 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4317 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4318 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4319 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4320 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4321 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4322 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4323 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4324 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4325 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4326 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4328 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4331 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4332 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4333 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4334 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4335 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4336 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4337 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4341};
4342
Owen Taylor3473f882001-02-23 17:55:21 +00004343/**
4344 * xmlParseCharData:
4345 * @ctxt: an XML parser context
4346 * @cdata: int indicating whether we are within a CDATA section
4347 *
4348 * parse a CharData section.
4349 * if we are within a CDATA section ']]>' marks an end of section.
4350 *
4351 * The right angle bracket (>) may be represented using the string "&gt;",
4352 * and must, for compatibility, be escaped using "&gt;" or a character
4353 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004354 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004355 *
4356 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4357 */
4358
4359void
4360xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004361 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004362 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004363 int line = ctxt->input->line;
4364 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004365 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004366
4367 SHRINK;
4368 GROW;
4369 /*
4370 * Accelerated common case where input don't need to be
4371 * modified before passing it to the handler.
4372 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004373 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004374 in = ctxt->input->cur;
4375 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004376get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004377 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004378 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004379 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004380 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004381 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004382 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004383 goto get_more_space;
4384 }
4385 if (*in == '<') {
4386 nbchar = in - ctxt->input->cur;
4387 if (nbchar > 0) {
4388 const xmlChar *tmp = ctxt->input->cur;
4389 ctxt->input->cur = in;
4390
Daniel Veillard34099b42004-11-04 17:34:35 +00004391 if ((ctxt->sax != NULL) &&
4392 (ctxt->sax->ignorableWhitespace !=
4393 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004394 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004395 if (ctxt->sax->ignorableWhitespace != NULL)
4396 ctxt->sax->ignorableWhitespace(ctxt->userData,
4397 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004398 } else {
4399 if (ctxt->sax->characters != NULL)
4400 ctxt->sax->characters(ctxt->userData,
4401 tmp, nbchar);
4402 if (*ctxt->space == -1)
4403 *ctxt->space = -2;
4404 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004405 } else if ((ctxt->sax != NULL) &&
4406 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004407 ctxt->sax->characters(ctxt->userData,
4408 tmp, nbchar);
4409 }
4410 }
4411 return;
4412 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004413
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004414get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004415 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004416 while (test_char_data[*in]) {
4417 in++;
4418 ccol++;
4419 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004420 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004421 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004422 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004423 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004424 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004425 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004426 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004427 }
4428 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004429 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Nick Wellnhofer52ceced2017-07-01 17:49:30 +02004431 ctxt->input->cur = in + 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004432 return;
4433 }
4434 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004435 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004436 goto get_more;
4437 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004438 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004439 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004440 if ((ctxt->sax != NULL) &&
4441 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004442 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004443 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004444 const xmlChar *tmp = ctxt->input->cur;
4445 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004446
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004447 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004448 if (ctxt->sax->ignorableWhitespace != NULL)
4449 ctxt->sax->ignorableWhitespace(ctxt->userData,
4450 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004451 } else {
4452 if (ctxt->sax->characters != NULL)
4453 ctxt->sax->characters(ctxt->userData,
4454 tmp, nbchar);
4455 if (*ctxt->space == -1)
4456 *ctxt->space = -2;
4457 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004458 line = ctxt->input->line;
4459 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004460 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004461 if (ctxt->sax->characters != NULL)
4462 ctxt->sax->characters(ctxt->userData,
4463 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004464 line = ctxt->input->line;
4465 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004466 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004467 /* something really bad happened in the SAX callback */
4468 if (ctxt->instate != XML_PARSER_CONTENT)
4469 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004470 }
4471 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004472 if (*in == 0xD) {
4473 in++;
4474 if (*in == 0xA) {
4475 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004476 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004477 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004478 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004479 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004480 in--;
4481 }
4482 if (*in == '<') {
4483 return;
4484 }
4485 if (*in == '&') {
4486 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004487 }
4488 SHRINK;
4489 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004490 if (ctxt->instate == XML_PARSER_EOF)
4491 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004492 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004493 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004494 nbchar = 0;
4495 }
Daniel Veillard50582112001-03-26 22:52:16 +00004496 ctxt->input->line = line;
4497 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004498 xmlParseCharDataComplex(ctxt, cdata);
4499}
4500
Daniel Veillard01c13b52002-12-10 15:19:08 +00004501/**
4502 * xmlParseCharDataComplex:
4503 * @ctxt: an XML parser context
4504 * @cdata: int indicating whether we are within a CDATA section
4505 *
4506 * parse a CharData section.this is the fallback function
4507 * of xmlParseCharData() when the parsing requires handling
4508 * of non-ASCII characters.
4509 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004510static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004511xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004512 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4513 int nbchar = 0;
4514 int cur, l;
4515 int count = 0;
4516
4517 SHRINK;
4518 GROW;
4519 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004520 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004521 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004522 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004523 if ((cur == ']') && (NXT(1) == ']') &&
4524 (NXT(2) == '>')) {
4525 if (cdata) break;
4526 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004527 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004528 }
4529 }
4530 COPY_BUF(l,buf,nbchar,cur);
4531 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004532 buf[nbchar] = 0;
4533
Owen Taylor3473f882001-02-23 17:55:21 +00004534 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004535 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004536 */
4537 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004538 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004539 if (ctxt->sax->ignorableWhitespace != NULL)
4540 ctxt->sax->ignorableWhitespace(ctxt->userData,
4541 buf, nbchar);
4542 } else {
4543 if (ctxt->sax->characters != NULL)
4544 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004545 if ((ctxt->sax->characters !=
4546 ctxt->sax->ignorableWhitespace) &&
4547 (*ctxt->space == -1))
4548 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004549 }
4550 }
4551 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004552 /* something really bad happened in the SAX callback */
4553 if (ctxt->instate != XML_PARSER_CONTENT)
4554 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004555 }
4556 count++;
4557 if (count > 50) {
4558 GROW;
4559 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004560 if (ctxt->instate == XML_PARSER_EOF)
4561 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004562 }
4563 NEXTL(l);
4564 cur = CUR_CHAR(l);
4565 }
4566 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004567 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004568 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004569 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004570 */
4571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004572 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004573 if (ctxt->sax->ignorableWhitespace != NULL)
4574 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4575 } else {
4576 if (ctxt->sax->characters != NULL)
4577 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004578 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4579 (*ctxt->space == -1))
4580 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004581 }
4582 }
4583 }
Nick Wellnhofer69936b12017-08-30 14:16:01 +02004584 if ((cur != 0) && (!IS_CHAR(cur))) {
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004585 /* Generate the error and skip the offending character */
4586 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4587 "PCDATA invalid Char value %d\n",
4588 cur);
4589 NEXTL(l);
4590 }
Owen Taylor3473f882001-02-23 17:55:21 +00004591}
4592
4593/**
4594 * xmlParseExternalID:
4595 * @ctxt: an XML parser context
4596 * @publicID: a xmlChar** receiving PubidLiteral
4597 * @strict: indicate whether we should restrict parsing to only
4598 * production [75], see NOTE below
4599 *
4600 * Parse an External ID or a Public ID
4601 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004602 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004603 * 'PUBLIC' S PubidLiteral S SystemLiteral
4604 *
4605 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4606 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4607 *
4608 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4609 *
4610 * Returns the function returns SystemLiteral and in the second
4611 * case publicID receives PubidLiteral, is strict is off
4612 * it is possible to return NULL and have publicID set.
4613 */
4614
4615xmlChar *
4616xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4617 xmlChar *URI = NULL;
4618
4619 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004620
4621 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004622 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004623 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004624 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004625 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4626 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004627 }
Owen Taylor3473f882001-02-23 17:55:21 +00004628 URI = xmlParseSystemLiteral(ctxt);
4629 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004630 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004631 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004632 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004633 SKIP(6);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004634 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004635 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004636 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004637 }
Owen Taylor3473f882001-02-23 17:55:21 +00004638 *publicID = xmlParsePubidLiteral(ctxt);
4639 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004640 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004641 }
4642 if (strict) {
4643 /*
4644 * We don't handle [83] so "S SystemLiteral" is required.
4645 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004646 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004647 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004648 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004649 }
4650 } else {
4651 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004652 * We handle [83] so we return immediately, if
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004653 * "S SystemLiteral" is not detected. We skip blanks if no
4654 * system literal was found, but this is harmless since we must
4655 * be at the end of a NotationDecl.
Owen Taylor3473f882001-02-23 17:55:21 +00004656 */
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02004657 if (SKIP_BLANKS == 0) return(NULL);
4658 if ((CUR != '\'') && (CUR != '"')) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004659 }
Owen Taylor3473f882001-02-23 17:55:21 +00004660 URI = xmlParseSystemLiteral(ctxt);
4661 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004662 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004663 }
4664 }
4665 return(URI);
4666}
4667
4668/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004669 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004670 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004671 * @buf: the already parsed part of the buffer
4672 * @len: number of bytes filles in the buffer
4673 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004674 *
4675 * Skip an XML (SGML) comment <!-- .... -->
4676 * The spec says that "For compatibility, the string "--" (double-hyphen)
4677 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004678 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004679 *
4680 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4681 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004682static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004683xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4684 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004685 int q, ql;
4686 int r, rl;
4687 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004688 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004689 int inputid;
4690
4691 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004692
Owen Taylor3473f882001-02-23 17:55:21 +00004693 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004694 len = 0;
4695 size = XML_PARSER_BUFFER_SIZE;
4696 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4697 if (buf == NULL) {
4698 xmlErrMemory(ctxt, NULL);
4699 return;
4700 }
Owen Taylor3473f882001-02-23 17:55:21 +00004701 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004702 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004703 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004704 if (q == 0)
4705 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004706 if (!IS_CHAR(q)) {
4707 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4708 "xmlParseComment: invalid xmlChar value %d\n",
4709 q);
4710 xmlFree (buf);
4711 return;
4712 }
Owen Taylor3473f882001-02-23 17:55:21 +00004713 NEXTL(ql);
4714 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004715 if (r == 0)
4716 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004717 if (!IS_CHAR(r)) {
4718 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4719 "xmlParseComment: invalid xmlChar value %d\n",
4720 q);
4721 xmlFree (buf);
4722 return;
4723 }
Owen Taylor3473f882001-02-23 17:55:21 +00004724 NEXTL(rl);
4725 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004726 if (cur == 0)
4727 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004728 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004729 ((cur != '>') ||
4730 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004731 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004732 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004733 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004734 if ((len > XML_MAX_TEXT_LENGTH) &&
4735 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4736 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4737 "Comment too big found", NULL);
4738 xmlFree (buf);
4739 return;
4740 }
Owen Taylor3473f882001-02-23 17:55:21 +00004741 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004742 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004743 size_t new_size;
4744
4745 new_size = size * 2;
4746 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004747 if (new_buf == NULL) {
4748 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004749 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004750 return;
4751 }
William M. Bracka3215c72004-07-31 16:24:01 +00004752 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004753 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004754 }
4755 COPY_BUF(ql,buf,len,q);
4756 q = r;
4757 ql = rl;
4758 r = cur;
4759 rl = l;
4760
4761 count++;
4762 if (count > 50) {
4763 GROW;
4764 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004765 if (ctxt->instate == XML_PARSER_EOF) {
4766 xmlFree(buf);
4767 return;
4768 }
Owen Taylor3473f882001-02-23 17:55:21 +00004769 }
4770 NEXTL(l);
4771 cur = CUR_CHAR(l);
4772 if (cur == 0) {
4773 SHRINK;
4774 GROW;
4775 cur = CUR_CHAR(l);
4776 }
4777 }
4778 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004779 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004780 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004781 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004782 } else if (!IS_CHAR(cur)) {
4783 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4784 "xmlParseComment: invalid xmlChar value %d\n",
4785 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004786 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004787 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004788 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004789 "Comment doesn't start and stop in the same"
4790 " entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004791 }
4792 NEXT;
4793 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4794 (!ctxt->disableSAX))
4795 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004796 }
Daniel Veillardda629342007-08-01 07:49:06 +00004797 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004798 return;
4799not_terminated:
4800 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4801 "Comment not terminated\n", NULL);
4802 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004803 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004804}
Daniel Veillardda629342007-08-01 07:49:06 +00004805
Daniel Veillard4c778d82005-01-23 17:37:44 +00004806/**
4807 * xmlParseComment:
4808 * @ctxt: an XML parser context
4809 *
4810 * Skip an XML (SGML) comment <!-- .... -->
4811 * The spec says that "For compatibility, the string "--" (double-hyphen)
4812 * must not occur within comments. "
4813 *
4814 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4815 */
4816void
4817xmlParseComment(xmlParserCtxtPtr ctxt) {
4818 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004819 size_t size = XML_PARSER_BUFFER_SIZE;
4820 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004821 xmlParserInputState state;
4822 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004823 size_t nbchar = 0;
4824 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004825 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004826
4827 /*
4828 * Check that there is a comment right here.
4829 */
4830 if ((RAW != '<') || (NXT(1) != '!') ||
4831 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004832 state = ctxt->instate;
4833 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004834 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004835 SKIP(4);
4836 SHRINK;
4837 GROW;
4838
4839 /*
4840 * Accelerated common case where input don't need to be
4841 * modified before passing it to the handler.
4842 */
4843 in = ctxt->input->cur;
4844 do {
4845 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004846 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004847 ctxt->input->line++; ctxt->input->col = 1;
4848 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004849 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004850 }
4851get_more:
4852 ccol = ctxt->input->col;
4853 while (((*in > '-') && (*in <= 0x7F)) ||
4854 ((*in >= 0x20) && (*in < '-')) ||
4855 (*in == 0x09)) {
4856 in++;
4857 ccol++;
4858 }
4859 ctxt->input->col = ccol;
4860 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004861 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004862 ctxt->input->line++; ctxt->input->col = 1;
4863 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004864 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004865 goto get_more;
4866 }
4867 nbchar = in - ctxt->input->cur;
4868 /*
4869 * save current set of data
4870 */
4871 if (nbchar > 0) {
4872 if ((ctxt->sax != NULL) &&
4873 (ctxt->sax->comment != NULL)) {
4874 if (buf == NULL) {
4875 if ((*in == '-') && (in[1] == '-'))
4876 size = nbchar + 1;
4877 else
4878 size = XML_PARSER_BUFFER_SIZE + nbchar;
4879 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4880 if (buf == NULL) {
4881 xmlErrMemory(ctxt, NULL);
4882 ctxt->instate = state;
4883 return;
4884 }
4885 len = 0;
4886 } else if (len + nbchar + 1 >= size) {
4887 xmlChar *new_buf;
4888 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4889 new_buf = (xmlChar *) xmlRealloc(buf,
4890 size * sizeof(xmlChar));
4891 if (new_buf == NULL) {
4892 xmlFree (buf);
4893 xmlErrMemory(ctxt, NULL);
4894 ctxt->instate = state;
4895 return;
4896 }
4897 buf = new_buf;
4898 }
4899 memcpy(&buf[len], ctxt->input->cur, nbchar);
4900 len += nbchar;
4901 buf[len] = 0;
4902 }
4903 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004904 if ((len > XML_MAX_TEXT_LENGTH) &&
4905 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4906 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4907 "Comment too big found", NULL);
4908 xmlFree (buf);
4909 return;
4910 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004911 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004912 if (*in == 0xA) {
4913 in++;
4914 ctxt->input->line++; ctxt->input->col = 1;
4915 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004916 if (*in == 0xD) {
4917 in++;
4918 if (*in == 0xA) {
4919 ctxt->input->cur = in;
4920 in++;
4921 ctxt->input->line++; ctxt->input->col = 1;
4922 continue; /* while */
4923 }
4924 in--;
4925 }
4926 SHRINK;
4927 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004928 if (ctxt->instate == XML_PARSER_EOF) {
4929 xmlFree(buf);
4930 return;
4931 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004932 in = ctxt->input->cur;
4933 if (*in == '-') {
4934 if (in[1] == '-') {
4935 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004936 if (ctxt->input->id != inputid) {
4937 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02004938 "comment doesn't start and stop in the"
4939 " same entity\n");
Daniel Veillard051d52c2008-07-29 16:44:59 +00004940 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004941 SKIP(3);
4942 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4943 (!ctxt->disableSAX)) {
4944 if (buf != NULL)
4945 ctxt->sax->comment(ctxt->userData, buf);
4946 else
4947 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4948 }
4949 if (buf != NULL)
4950 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08004951 if (ctxt->instate != XML_PARSER_EOF)
4952 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004953 return;
4954 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004955 if (buf != NULL) {
4956 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4957 "Double hyphen within comment: "
4958 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004959 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004960 } else
4961 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4962 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004963 in++;
4964 ctxt->input->col++;
4965 }
4966 in++;
4967 ctxt->input->col++;
4968 goto get_more;
4969 }
4970 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4971 xmlParseCommentComplex(ctxt, buf, len, size);
4972 ctxt->instate = state;
4973 return;
4974}
4975
Owen Taylor3473f882001-02-23 17:55:21 +00004976
4977/**
4978 * xmlParsePITarget:
4979 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004980 *
Owen Taylor3473f882001-02-23 17:55:21 +00004981 * parse the name of a PI
4982 *
4983 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4984 *
4985 * Returns the PITarget name or NULL
4986 */
4987
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004988const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004989xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004990 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004991
4992 name = xmlParseName(ctxt);
4993 if ((name != NULL) &&
4994 ((name[0] == 'x') || (name[0] == 'X')) &&
4995 ((name[1] == 'm') || (name[1] == 'M')) &&
4996 ((name[2] == 'l') || (name[2] == 'L'))) {
4997 int i;
4998 if ((name[0] == 'x') && (name[1] == 'm') &&
4999 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005000 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005001 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005002 return(name);
5003 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005004 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005005 return(name);
5006 }
5007 for (i = 0;;i++) {
5008 if (xmlW3CPIs[i] == NULL) break;
5009 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5010 return(name);
5011 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005012 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5013 "xmlParsePITarget: invalid name prefix 'xml'\n",
5014 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005015 }
Daniel Veillard37334572008-07-31 08:20:02 +00005016 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005017 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005018 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005019 }
Owen Taylor3473f882001-02-23 17:55:21 +00005020 return(name);
5021}
5022
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005023#ifdef LIBXML_CATALOG_ENABLED
5024/**
5025 * xmlParseCatalogPI:
5026 * @ctxt: an XML parser context
5027 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005028 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005029 * parse an XML Catalog Processing Instruction.
5030 *
5031 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5032 *
5033 * Occurs only if allowed by the user and if happening in the Misc
5034 * part of the document before any doctype informations
5035 * This will add the given catalog to the parsing context in order
5036 * to be used if there is a resolution need further down in the document
5037 */
5038
5039static void
5040xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5041 xmlChar *URL = NULL;
5042 const xmlChar *tmp, *base;
5043 xmlChar marker;
5044
5045 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005046 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005047 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5048 goto error;
5049 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005050 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005051 if (*tmp != '=') {
5052 return;
5053 }
5054 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005055 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005056 marker = *tmp;
5057 if ((marker != '\'') && (marker != '"'))
5058 goto error;
5059 tmp++;
5060 base = tmp;
5061 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5062 if (*tmp == 0)
5063 goto error;
5064 URL = xmlStrndup(base, tmp - base);
5065 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005066 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005067 if (*tmp != 0)
5068 goto error;
5069
5070 if (URL != NULL) {
5071 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5072 xmlFree(URL);
5073 }
5074 return;
5075
5076error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005077 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5078 "Catalog PI syntax error: %s\n",
5079 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005080 if (URL != NULL)
5081 xmlFree(URL);
5082}
5083#endif
5084
Owen Taylor3473f882001-02-23 17:55:21 +00005085/**
5086 * xmlParsePI:
5087 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005088 *
Owen Taylor3473f882001-02-23 17:55:21 +00005089 * parse an XML Processing Instruction.
5090 *
5091 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5092 *
5093 * The processing is transfered to SAX once parsed.
5094 */
5095
5096void
5097xmlParsePI(xmlParserCtxtPtr ctxt) {
5098 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005099 size_t len = 0;
5100 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005101 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005102 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005103 xmlParserInputState state;
5104 int count = 0;
5105
5106 if ((RAW == '<') && (NXT(1) == '?')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005107 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005108 state = ctxt->instate;
5109 ctxt->instate = XML_PARSER_PI;
5110 /*
5111 * this is a Processing Instruction.
5112 */
5113 SKIP(2);
5114 SHRINK;
5115
5116 /*
5117 * Parse the target name and check for special support like
5118 * namespace.
5119 */
5120 target = xmlParsePITarget(ctxt);
5121 if (target != NULL) {
5122 if ((RAW == '?') && (NXT(1) == '>')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005123 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005124 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005125 "PI declaration doesn't start and stop in"
5126 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005127 }
5128 SKIP(2);
5129
5130 /*
5131 * SAX: PI detected.
5132 */
5133 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5134 (ctxt->sax->processingInstruction != NULL))
5135 ctxt->sax->processingInstruction(ctxt->userData,
5136 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005137 if (ctxt->instate != XML_PARSER_EOF)
5138 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005139 return;
5140 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005141 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005142 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005143 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005144 ctxt->instate = state;
5145 return;
5146 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005147 if (SKIP_BLANKS == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005148 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5149 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005150 }
Owen Taylor3473f882001-02-23 17:55:21 +00005151 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005152 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005153 ((cur != '?') || (NXT(1) != '>'))) {
5154 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005155 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005156 size_t new_size = size * 2;
5157 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005158 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005159 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005160 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005161 ctxt->instate = state;
5162 return;
5163 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005164 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005165 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005166 }
5167 count++;
5168 if (count > 50) {
5169 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005170 if (ctxt->instate == XML_PARSER_EOF) {
5171 xmlFree(buf);
5172 return;
5173 }
Owen Taylor3473f882001-02-23 17:55:21 +00005174 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005175 if ((len > XML_MAX_TEXT_LENGTH) &&
5176 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5177 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5178 "PI %s too big found", target);
5179 xmlFree(buf);
5180 ctxt->instate = state;
5181 return;
5182 }
Owen Taylor3473f882001-02-23 17:55:21 +00005183 }
5184 COPY_BUF(l,buf,len,cur);
5185 NEXTL(l);
5186 cur = CUR_CHAR(l);
5187 if (cur == 0) {
5188 SHRINK;
5189 GROW;
5190 cur = CUR_CHAR(l);
5191 }
5192 }
Daniel Veillard51304812012-07-19 20:34:26 +08005193 if ((len > XML_MAX_TEXT_LENGTH) &&
5194 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5195 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5196 "PI %s too big found", target);
5197 xmlFree(buf);
5198 ctxt->instate = state;
5199 return;
5200 }
Owen Taylor3473f882001-02-23 17:55:21 +00005201 buf[len] = 0;
5202 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005203 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5204 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005205 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005206 if (inputid != ctxt->input->id) {
5207 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5208 "PI declaration doesn't start and stop in"
5209 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005210 }
5211 SKIP(2);
5212
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005213#ifdef LIBXML_CATALOG_ENABLED
5214 if (((state == XML_PARSER_MISC) ||
5215 (state == XML_PARSER_START)) &&
5216 (xmlStrEqual(target, XML_CATALOG_PI))) {
5217 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5218 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5219 (allow == XML_CATA_ALLOW_ALL))
5220 xmlParseCatalogPI(ctxt, buf);
5221 }
5222#endif
5223
5224
Owen Taylor3473f882001-02-23 17:55:21 +00005225 /*
5226 * SAX: PI detected.
5227 */
5228 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5229 (ctxt->sax->processingInstruction != NULL))
5230 ctxt->sax->processingInstruction(ctxt->userData,
5231 target, buf);
5232 }
5233 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005234 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005235 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005236 }
Chris Evans77404b82011-12-14 16:18:25 +08005237 if (ctxt->instate != XML_PARSER_EOF)
5238 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005239 }
5240}
5241
5242/**
5243 * xmlParseNotationDecl:
5244 * @ctxt: an XML parser context
5245 *
5246 * parse a notation declaration
5247 *
5248 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5249 *
5250 * Hence there is actually 3 choices:
5251 * 'PUBLIC' S PubidLiteral
5252 * 'PUBLIC' S PubidLiteral S SystemLiteral
5253 * and 'SYSTEM' S SystemLiteral
5254 *
5255 * See the NOTE on xmlParseExternalID().
5256 */
5257
5258void
5259xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005260 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005261 xmlChar *Pubid;
5262 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005263
Daniel Veillarda07050d2003-10-19 14:46:32 +00005264 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005265 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005266 SHRINK;
5267 SKIP(10);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005268 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005269 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5270 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005271 return;
5272 }
Owen Taylor3473f882001-02-23 17:55:21 +00005273
Daniel Veillard76d66f42001-05-16 21:05:17 +00005274 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005275 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005276 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005277 return;
5278 }
Daniel Veillard37334572008-07-31 08:20:02 +00005279 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005280 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005281 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005282 name, NULL, NULL);
5283 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005284 if (SKIP_BLANKS == 0) {
5285 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5286 "Space required after the NOTATION name'\n");
5287 return;
5288 }
Owen Taylor3473f882001-02-23 17:55:21 +00005289
5290 /*
5291 * Parse the IDs.
5292 */
5293 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5294 SKIP_BLANKS;
5295
5296 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005297 if (inputid != ctxt->input->id) {
5298 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299 "Notation declaration doesn't start and stop"
5300 " in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005301 }
5302 NEXT;
5303 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5304 (ctxt->sax->notationDecl != NULL))
5305 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5306 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005307 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005308 }
Owen Taylor3473f882001-02-23 17:55:21 +00005309 if (Systemid != NULL) xmlFree(Systemid);
5310 if (Pubid != NULL) xmlFree(Pubid);
5311 }
5312}
5313
5314/**
5315 * xmlParseEntityDecl:
5316 * @ctxt: an XML parser context
5317 *
5318 * parse <!ENTITY declarations
5319 *
5320 * [70] EntityDecl ::= GEDecl | PEDecl
5321 *
5322 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5323 *
5324 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5325 *
5326 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5327 *
5328 * [74] PEDef ::= EntityValue | ExternalID
5329 *
5330 * [76] NDataDecl ::= S 'NDATA' S Name
5331 *
5332 * [ VC: Notation Declared ]
5333 * The Name must match the declared name of a notation.
5334 */
5335
5336void
5337xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005338 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005339 xmlChar *value = NULL;
5340 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005341 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005342 int isParameter = 0;
5343 xmlChar *orig = NULL;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005344
Daniel Veillard4c778d82005-01-23 17:37:44 +00005345 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005346 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005347 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005348 SHRINK;
5349 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005350 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005353 }
Owen Taylor3473f882001-02-23 17:55:21 +00005354
5355 if (RAW == '%') {
5356 NEXT;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005357 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005358 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005359 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005360 }
Owen Taylor3473f882001-02-23 17:55:21 +00005361 isParameter = 1;
5362 }
5363
Daniel Veillard76d66f42001-05-16 21:05:17 +00005364 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005365 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005366 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5367 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005368 return;
5369 }
Daniel Veillard37334572008-07-31 08:20:02 +00005370 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005371 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005372 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005373 name, NULL, NULL);
5374 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005375 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005376 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5377 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005378 }
Owen Taylor3473f882001-02-23 17:55:21 +00005379
Daniel Veillardf5582f12002-06-11 10:08:16 +00005380 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005381 /*
5382 * handle the various case of definitions...
5383 */
5384 if (isParameter) {
5385 if ((RAW == '"') || (RAW == '\'')) {
5386 value = xmlParseEntityValue(ctxt, &orig);
5387 if (value) {
5388 if ((ctxt->sax != NULL) &&
5389 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5390 ctxt->sax->entityDecl(ctxt->userData, name,
5391 XML_INTERNAL_PARAMETER_ENTITY,
5392 NULL, NULL, value);
5393 }
5394 } else {
5395 URI = xmlParseExternalID(ctxt, &literal, 1);
5396 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005397 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005398 }
5399 if (URI) {
5400 xmlURIPtr uri;
5401
5402 uri = xmlParseURI((const char *) URI);
5403 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005404 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5405 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005406 /*
5407 * This really ought to be a well formedness error
5408 * but the XML Core WG decided otherwise c.f. issue
5409 * E26 of the XML erratas.
5410 */
Owen Taylor3473f882001-02-23 17:55:21 +00005411 } else {
5412 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005413 /*
5414 * Okay this is foolish to block those but not
5415 * invalid URIs.
5416 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005417 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005418 } else {
5419 if ((ctxt->sax != NULL) &&
5420 (!ctxt->disableSAX) &&
5421 (ctxt->sax->entityDecl != NULL))
5422 ctxt->sax->entityDecl(ctxt->userData, name,
5423 XML_EXTERNAL_PARAMETER_ENTITY,
5424 literal, URI, NULL);
5425 }
5426 xmlFreeURI(uri);
5427 }
5428 }
5429 }
5430 } else {
5431 if ((RAW == '"') || (RAW == '\'')) {
5432 value = xmlParseEntityValue(ctxt, &orig);
5433 if ((ctxt->sax != NULL) &&
5434 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5435 ctxt->sax->entityDecl(ctxt->userData, name,
5436 XML_INTERNAL_GENERAL_ENTITY,
5437 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005438 /*
5439 * For expat compatibility in SAX mode.
5440 */
5441 if ((ctxt->myDoc == NULL) ||
5442 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5443 if (ctxt->myDoc == NULL) {
5444 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005445 if (ctxt->myDoc == NULL) {
5446 xmlErrMemory(ctxt, "New Doc failed");
5447 return;
5448 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005449 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005450 }
5451 if (ctxt->myDoc->intSubset == NULL)
5452 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5453 BAD_CAST "fake", NULL, NULL);
5454
Daniel Veillard1af9a412003-08-20 22:54:39 +00005455 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5456 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005457 }
Owen Taylor3473f882001-02-23 17:55:21 +00005458 } else {
5459 URI = xmlParseExternalID(ctxt, &literal, 1);
5460 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005461 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005462 }
5463 if (URI) {
5464 xmlURIPtr uri;
5465
5466 uri = xmlParseURI((const char *)URI);
5467 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005468 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5469 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005470 /*
5471 * This really ought to be a well formedness error
5472 * but the XML Core WG decided otherwise c.f. issue
5473 * E26 of the XML erratas.
5474 */
Owen Taylor3473f882001-02-23 17:55:21 +00005475 } else {
5476 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005477 /*
5478 * Okay this is foolish to block those but not
5479 * invalid URIs.
5480 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005481 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005482 }
5483 xmlFreeURI(uri);
5484 }
5485 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005486 if ((RAW != '>') && (SKIP_BLANKS == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5488 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005489 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005490 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005491 SKIP(5);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005492 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005493 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5494 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005495 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005496 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5498 (ctxt->sax->unparsedEntityDecl != NULL))
5499 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5500 literal, URI, ndata);
5501 } else {
5502 if ((ctxt->sax != NULL) &&
5503 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5504 ctxt->sax->entityDecl(ctxt->userData, name,
5505 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5506 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005507 /*
5508 * For expat compatibility in SAX mode.
5509 * assuming the entity repalcement was asked for
5510 */
5511 if ((ctxt->replaceEntities != 0) &&
5512 ((ctxt->myDoc == NULL) ||
5513 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5514 if (ctxt->myDoc == NULL) {
5515 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005516 if (ctxt->myDoc == NULL) {
5517 xmlErrMemory(ctxt, "New Doc failed");
5518 return;
5519 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005520 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005521 }
5522
5523 if (ctxt->myDoc->intSubset == NULL)
5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005526 xmlSAX2EntityDecl(ctxt, name,
5527 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5528 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005529 }
Owen Taylor3473f882001-02-23 17:55:21 +00005530 }
5531 }
5532 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005533 if (ctxt->instate == XML_PARSER_EOF)
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005534 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +00005535 SKIP_BLANKS;
5536 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005537 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005538 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005539 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005540 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005541 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005542 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005543 "Entity declaration doesn't start and stop in"
5544 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005545 }
5546 NEXT;
5547 }
5548 if (orig != NULL) {
5549 /*
5550 * Ugly mechanism to save the raw entity value.
5551 */
5552 xmlEntityPtr cur = NULL;
5553
5554 if (isParameter) {
5555 if ((ctxt->sax != NULL) &&
5556 (ctxt->sax->getParameterEntity != NULL))
5557 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5558 } else {
5559 if ((ctxt->sax != NULL) &&
5560 (ctxt->sax->getEntity != NULL))
5561 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005562 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005563 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005564 }
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005566 if ((cur != NULL) && (cur->orig == NULL)) {
5567 cur->orig = orig;
5568 orig = NULL;
5569 }
Owen Taylor3473f882001-02-23 17:55:21 +00005570 }
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005571
5572done:
Owen Taylor3473f882001-02-23 17:55:21 +00005573 if (value != NULL) xmlFree(value);
5574 if (URI != NULL) xmlFree(URI);
5575 if (literal != NULL) xmlFree(literal);
Nick Wellnhoferbedbef82017-06-09 15:10:13 +02005576 if (orig != NULL) xmlFree(orig);
Owen Taylor3473f882001-02-23 17:55:21 +00005577 }
5578}
5579
5580/**
5581 * xmlParseDefaultDecl:
5582 * @ctxt: an XML parser context
5583 * @value: Receive a possible fixed default value for the attribute
5584 *
5585 * Parse an attribute default declaration
5586 *
5587 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5588 *
5589 * [ VC: Required Attribute ]
5590 * if the default declaration is the keyword #REQUIRED, then the
5591 * attribute must be specified for all elements of the type in the
5592 * attribute-list declaration.
5593 *
5594 * [ VC: Attribute Default Legal ]
5595 * The declared default value must meet the lexical constraints of
5596 * the declared attribute type c.f. xmlValidateAttributeDecl()
5597 *
5598 * [ VC: Fixed Attribute Default ]
5599 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005600 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005601 *
5602 * [ WFC: No < in Attribute Values ]
5603 * handled in xmlParseAttValue()
5604 *
5605 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005606 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005607 */
5608
5609int
5610xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5611 int val;
5612 xmlChar *ret;
5613
5614 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005615 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005616 SKIP(9);
5617 return(XML_ATTRIBUTE_REQUIRED);
5618 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005619 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005620 SKIP(8);
5621 return(XML_ATTRIBUTE_IMPLIED);
5622 }
5623 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005624 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005625 SKIP(6);
5626 val = XML_ATTRIBUTE_FIXED;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005627 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5629 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005630 }
Owen Taylor3473f882001-02-23 17:55:21 +00005631 }
5632 ret = xmlParseAttValue(ctxt);
5633 ctxt->instate = XML_PARSER_DTD;
5634 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005635 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005636 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005637 } else
5638 *value = ret;
5639 return(val);
5640}
5641
5642/**
5643 * xmlParseNotationType:
5644 * @ctxt: an XML parser context
5645 *
5646 * parse an Notation attribute type.
5647 *
5648 * Note: the leading 'NOTATION' S part has already being parsed...
5649 *
5650 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5651 *
5652 * [ VC: Notation Attributes ]
5653 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005654 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005655 *
5656 * Returns: the notation attribute tree built while parsing
5657 */
5658
5659xmlEnumerationPtr
5660xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005661 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005662 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005663
5664 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005665 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005666 return(NULL);
5667 }
5668 SHRINK;
5669 do {
5670 NEXT;
5671 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005672 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005673 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005674 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5675 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005676 xmlFreeEnumeration(ret);
5677 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005678 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005679 tmp = ret;
5680 while (tmp != NULL) {
5681 if (xmlStrEqual(name, tmp->name)) {
5682 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5683 "standalone: attribute notation value token %s duplicated\n",
5684 name, NULL);
5685 if (!xmlDictOwns(ctxt->dict, name))
5686 xmlFree((xmlChar *) name);
5687 break;
5688 }
5689 tmp = tmp->next;
5690 }
5691 if (tmp == NULL) {
5692 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005693 if (cur == NULL) {
5694 xmlFreeEnumeration(ret);
5695 return(NULL);
5696 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005697 if (last == NULL) ret = last = cur;
5698 else {
5699 last->next = cur;
5700 last = cur;
5701 }
Owen Taylor3473f882001-02-23 17:55:21 +00005702 }
5703 SKIP_BLANKS;
5704 } while (RAW == '|');
5705 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005706 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005707 xmlFreeEnumeration(ret);
5708 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005709 }
5710 NEXT;
5711 return(ret);
5712}
5713
5714/**
5715 * xmlParseEnumerationType:
5716 * @ctxt: an XML parser context
5717 *
5718 * parse an Enumeration attribute type.
5719 *
5720 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5721 *
5722 * [ VC: Enumeration ]
5723 * Values of this type must match one of the Nmtoken tokens in
5724 * the declaration
5725 *
5726 * Returns: the enumeration attribute tree built while parsing
5727 */
5728
5729xmlEnumerationPtr
5730xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5731 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005732 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005733
5734 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005735 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005736 return(NULL);
5737 }
5738 SHRINK;
5739 do {
5740 NEXT;
5741 SKIP_BLANKS;
5742 name = xmlParseNmtoken(ctxt);
5743 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005744 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 return(ret);
5746 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005747 tmp = ret;
5748 while (tmp != NULL) {
5749 if (xmlStrEqual(name, tmp->name)) {
5750 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5751 "standalone: attribute enumeration value token %s duplicated\n",
5752 name, NULL);
5753 if (!xmlDictOwns(ctxt->dict, name))
5754 xmlFree(name);
5755 break;
5756 }
5757 tmp = tmp->next;
5758 }
5759 if (tmp == NULL) {
5760 cur = xmlCreateEnumeration(name);
5761 if (!xmlDictOwns(ctxt->dict, name))
5762 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005763 if (cur == NULL) {
5764 xmlFreeEnumeration(ret);
5765 return(NULL);
5766 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005767 if (last == NULL) ret = last = cur;
5768 else {
5769 last->next = cur;
5770 last = cur;
5771 }
Owen Taylor3473f882001-02-23 17:55:21 +00005772 }
5773 SKIP_BLANKS;
5774 } while (RAW == '|');
5775 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005776 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005777 return(ret);
5778 }
5779 NEXT;
5780 return(ret);
5781}
5782
5783/**
5784 * xmlParseEnumeratedType:
5785 * @ctxt: an XML parser context
5786 * @tree: the enumeration tree built while parsing
5787 *
5788 * parse an Enumerated attribute type.
5789 *
5790 * [57] EnumeratedType ::= NotationType | Enumeration
5791 *
5792 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5793 *
5794 *
5795 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5796 */
5797
5798int
5799xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005800 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005801 SKIP(8);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005802 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005803 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5804 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005805 return(0);
5806 }
Owen Taylor3473f882001-02-23 17:55:21 +00005807 *tree = xmlParseNotationType(ctxt);
5808 if (*tree == NULL) return(0);
5809 return(XML_ATTRIBUTE_NOTATION);
5810 }
5811 *tree = xmlParseEnumerationType(ctxt);
5812 if (*tree == NULL) return(0);
5813 return(XML_ATTRIBUTE_ENUMERATION);
5814}
5815
5816/**
5817 * xmlParseAttributeType:
5818 * @ctxt: an XML parser context
5819 * @tree: the enumeration tree built while parsing
5820 *
5821 * parse the Attribute list def for an element
5822 *
5823 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5824 *
5825 * [55] StringType ::= 'CDATA'
5826 *
5827 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5828 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5829 *
5830 * Validity constraints for attribute values syntax are checked in
5831 * xmlValidateAttributeValue()
5832 *
5833 * [ VC: ID ]
5834 * Values of type ID must match the Name production. A name must not
5835 * appear more than once in an XML document as a value of this type;
5836 * i.e., ID values must uniquely identify the elements which bear them.
5837 *
5838 * [ VC: One ID per Element Type ]
5839 * No element type may have more than one ID attribute specified.
5840 *
5841 * [ VC: ID Attribute Default ]
5842 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5843 *
5844 * [ VC: IDREF ]
5845 * Values of type IDREF must match the Name production, and values
5846 * of type IDREFS must match Names; each IDREF Name must match the value
5847 * of an ID attribute on some element in the XML document; i.e. IDREF
5848 * values must match the value of some ID attribute.
5849 *
5850 * [ VC: Entity Name ]
5851 * Values of type ENTITY must match the Name production, values
5852 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005853 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005854 *
5855 * [ VC: Name Token ]
5856 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005857 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005858 *
5859 * Returns the attribute type
5860 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005861int
Owen Taylor3473f882001-02-23 17:55:21 +00005862xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5863 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005864 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005865 SKIP(5);
5866 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005867 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005868 SKIP(6);
5869 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005870 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005871 SKIP(5);
5872 return(XML_ATTRIBUTE_IDREF);
5873 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5874 SKIP(2);
5875 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005876 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005877 SKIP(6);
5878 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005879 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005880 SKIP(8);
5881 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005882 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005883 SKIP(8);
5884 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005885 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005886 SKIP(7);
5887 return(XML_ATTRIBUTE_NMTOKEN);
5888 }
5889 return(xmlParseEnumeratedType(ctxt, tree));
5890}
5891
5892/**
5893 * xmlParseAttributeListDecl:
5894 * @ctxt: an XML parser context
5895 *
5896 * : parse the Attribute list def for an element
5897 *
5898 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5899 *
5900 * [53] AttDef ::= S Name S AttType S DefaultDecl
5901 *
5902 */
5903void
5904xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005905 const xmlChar *elemName;
5906 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005907 xmlEnumerationPtr tree;
5908
Daniel Veillarda07050d2003-10-19 14:46:32 +00005909 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02005910 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005911
5912 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005913 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005914 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005915 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005916 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00005917 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005918 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005919 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5920 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005921 return;
5922 }
5923 SKIP_BLANKS;
5924 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005925 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005926 int type;
5927 int def;
5928 xmlChar *defaultValue = NULL;
5929
5930 GROW;
5931 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005932 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005933 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005934 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5935 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005936 break;
5937 }
5938 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005939 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005940 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005941 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005942 break;
5943 }
Owen Taylor3473f882001-02-23 17:55:21 +00005944
5945 type = xmlParseAttributeType(ctxt, &tree);
5946 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005947 break;
5948 }
5949
5950 GROW;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005951 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005952 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5953 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005954 if (tree != NULL)
5955 xmlFreeEnumeration(tree);
5956 break;
5957 }
Owen Taylor3473f882001-02-23 17:55:21 +00005958
5959 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5960 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005961 if (defaultValue != NULL)
5962 xmlFree(defaultValue);
5963 if (tree != NULL)
5964 xmlFreeEnumeration(tree);
5965 break;
5966 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005967 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5968 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005969
5970 GROW;
5971 if (RAW != '>') {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02005972 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005973 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005974 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005975 if (defaultValue != NULL)
5976 xmlFree(defaultValue);
5977 if (tree != NULL)
5978 xmlFreeEnumeration(tree);
5979 break;
5980 }
Owen Taylor3473f882001-02-23 17:55:21 +00005981 }
Owen Taylor3473f882001-02-23 17:55:21 +00005982 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5983 (ctxt->sax->attributeDecl != NULL))
5984 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5985 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005986 else if (tree != NULL)
5987 xmlFreeEnumeration(tree);
5988
5989 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005990 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00005991 (def != XML_ATTRIBUTE_REQUIRED)) {
5992 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5993 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005994 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005995 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5996 }
Owen Taylor3473f882001-02-23 17:55:21 +00005997 if (defaultValue != NULL)
5998 xmlFree(defaultValue);
5999 GROW;
6000 }
6001 if (RAW == '>') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006002 if (inputid != ctxt->input->id) {
6003 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6004 "Attribute list declaration doesn't start and"
6005 " stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006006 }
6007 NEXT;
6008 }
Owen Taylor3473f882001-02-23 17:55:21 +00006009 }
6010}
6011
6012/**
6013 * xmlParseElementMixedContentDecl:
6014 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006015 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006016 *
6017 * parse the declaration for a Mixed Element content
6018 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006019 *
Owen Taylor3473f882001-02-23 17:55:21 +00006020 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6021 * '(' S? '#PCDATA' S? ')'
6022 *
6023 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6024 *
6025 * [ VC: No Duplicate Types ]
6026 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006027 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006028 *
6029 * returns: the list of the xmlElementContentPtr describing the element choices
6030 */
6031xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006032xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006033 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006034 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006035
6036 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006037 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006038 SKIP(7);
6039 SKIP_BLANKS;
6040 SHRINK;
6041 if (RAW == ')') {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006042 if (ctxt->input->id != inputchk) {
6043 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6044 "Element content declaration doesn't start and"
6045 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006046 }
Owen Taylor3473f882001-02-23 17:55:21 +00006047 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006048 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006049 if (ret == NULL)
6050 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006051 if (RAW == '*') {
6052 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6053 NEXT;
6054 }
6055 return(ret);
6056 }
6057 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006058 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006059 if (ret == NULL) return(NULL);
6060 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006061 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006062 NEXT;
6063 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006064 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006065 if (ret == NULL) return(NULL);
6066 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006067 if (cur != NULL)
6068 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006069 cur = ret;
6070 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006071 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006072 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006073 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006074 if (n->c1 != NULL)
6075 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006076 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006077 if (n != NULL)
6078 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006079 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006080 }
6081 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006082 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006083 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006084 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006085 "xmlParseElementMixedContentDecl : Name expected\n");
Nick Wellnhofer8627e4e2017-05-23 18:11:08 +02006086 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006087 return(NULL);
6088 }
6089 SKIP_BLANKS;
6090 GROW;
6091 }
6092 if ((RAW == ')') && (NXT(1) == '*')) {
6093 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006094 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006095 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006096 if (cur->c2 != NULL)
6097 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006098 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006099 if (ret != NULL)
6100 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006101 if (ctxt->input->id != inputchk) {
6102 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6103 "Element content declaration doesn't start and"
6104 " stop in the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006105 }
Owen Taylor3473f882001-02-23 17:55:21 +00006106 SKIP(2);
6107 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006108 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006109 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006110 return(NULL);
6111 }
6112
6113 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006114 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006115 }
6116 return(ret);
6117}
6118
6119/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006120 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006121 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006122 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006123 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006124 *
6125 * parse the declaration for a Mixed Element content
6126 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006127 *
Owen Taylor3473f882001-02-23 17:55:21 +00006128 *
6129 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6130 *
6131 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6132 *
6133 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6134 *
6135 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6136 *
6137 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6138 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006139 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006140 * opening or closing parentheses in a choice, seq, or Mixed
6141 * construct is contained in the replacement text for a parameter
6142 * entity, both must be contained in the same replacement text. For
6143 * interoperability, if a parameter-entity reference appears in a
6144 * choice, seq, or Mixed construct, its replacement text should not
6145 * be empty, and neither the first nor last non-blank character of
6146 * the replacement text should be a connector (| or ,).
6147 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006148 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006149 * hierarchy.
6150 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006151static xmlElementContentPtr
6152xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6153 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006154 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006155 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006156 xmlChar type = 0;
6157
Daniel Veillard489f9672009-08-10 16:49:30 +02006158 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6159 (depth > 2048)) {
6160 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6161"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6162 depth);
6163 return(NULL);
6164 }
Owen Taylor3473f882001-02-23 17:55:21 +00006165 SKIP_BLANKS;
6166 GROW;
6167 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006168 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006169
Owen Taylor3473f882001-02-23 17:55:21 +00006170 /* Recurse on first child */
6171 NEXT;
6172 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006173 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6174 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006175 SKIP_BLANKS;
6176 GROW;
6177 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006178 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006179 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006180 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006181 return(NULL);
6182 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006183 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006184 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006185 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006186 return(NULL);
6187 }
Owen Taylor3473f882001-02-23 17:55:21 +00006188 GROW;
6189 if (RAW == '?') {
6190 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6191 NEXT;
6192 } else if (RAW == '*') {
6193 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6194 NEXT;
6195 } else if (RAW == '+') {
6196 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6197 NEXT;
6198 } else {
6199 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6200 }
Owen Taylor3473f882001-02-23 17:55:21 +00006201 GROW;
6202 }
6203 SKIP_BLANKS;
6204 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006205 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006206 /*
6207 * Each loop we parse one separator and one element.
6208 */
6209 if (RAW == ',') {
6210 if (type == 0) type = CUR;
6211
6212 /*
6213 * Detect "Name | Name , Name" error
6214 */
6215 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006216 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006217 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006218 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006219 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006220 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006221 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006222 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006223 return(NULL);
6224 }
6225 NEXT;
6226
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006227 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006228 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006229 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006230 xmlFreeDocElementContent(ctxt->myDoc, last);
6231 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006232 return(NULL);
6233 }
6234 if (last == NULL) {
6235 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006236 if (ret != NULL)
6237 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006238 ret = cur = op;
6239 } else {
6240 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006241 if (op != NULL)
6242 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006243 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006244 if (last != NULL)
6245 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006246 cur =op;
6247 last = NULL;
6248 }
6249 } else if (RAW == '|') {
6250 if (type == 0) type = CUR;
6251
6252 /*
6253 * Detect "Name , Name | Name" error
6254 */
6255 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006256 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006257 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006258 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006259 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006260 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006261 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006262 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006263 return(NULL);
6264 }
6265 NEXT;
6266
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006267 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006269 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006270 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006271 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006272 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006273 return(NULL);
6274 }
6275 if (last == NULL) {
6276 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006277 if (ret != NULL)
6278 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006279 ret = cur = op;
6280 } else {
6281 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006282 if (op != NULL)
6283 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006284 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006285 if (last != NULL)
6286 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006287 cur =op;
6288 last = NULL;
6289 }
6290 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006291 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006292 if ((last != NULL) && (last != ret))
6293 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006294 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006295 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006296 return(NULL);
6297 }
6298 GROW;
6299 SKIP_BLANKS;
6300 GROW;
6301 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006302 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006303 /* Recurse on second child */
6304 NEXT;
6305 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006306 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6307 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006308 SKIP_BLANKS;
6309 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006310 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006311 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006312 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006313 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006314 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006315 return(NULL);
6316 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006317 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006318 if (last == NULL) {
6319 if (ret != NULL)
6320 xmlFreeDocElementContent(ctxt->myDoc, ret);
6321 return(NULL);
6322 }
Owen Taylor3473f882001-02-23 17:55:21 +00006323 if (RAW == '?') {
6324 last->ocur = XML_ELEMENT_CONTENT_OPT;
6325 NEXT;
6326 } else if (RAW == '*') {
6327 last->ocur = XML_ELEMENT_CONTENT_MULT;
6328 NEXT;
6329 } else if (RAW == '+') {
6330 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6331 NEXT;
6332 } else {
6333 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6334 }
6335 }
6336 SKIP_BLANKS;
6337 GROW;
6338 }
6339 if ((cur != NULL) && (last != NULL)) {
6340 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006341 if (last != NULL)
6342 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006343 }
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006344 if (ctxt->input->id != inputchk) {
6345 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6346 "Element content declaration doesn't start and stop in"
6347 " the same entity\n");
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006348 }
Owen Taylor3473f882001-02-23 17:55:21 +00006349 NEXT;
6350 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006351 if (ret != NULL) {
6352 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6353 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6354 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6355 else
6356 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6357 }
Owen Taylor3473f882001-02-23 17:55:21 +00006358 NEXT;
6359 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006360 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006361 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006362 cur = ret;
6363 /*
6364 * Some normalization:
6365 * (a | b* | c?)* == (a | b | c)*
6366 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006367 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006368 if ((cur->c1 != NULL) &&
6369 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6370 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6371 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6372 if ((cur->c2 != NULL) &&
6373 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6374 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6375 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6376 cur = cur->c2;
6377 }
6378 }
Owen Taylor3473f882001-02-23 17:55:21 +00006379 NEXT;
6380 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006381 if (ret != NULL) {
6382 int found = 0;
6383
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006384 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6385 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6386 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006387 else
6388 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006389 /*
6390 * Some normalization:
6391 * (a | b*)+ == (a | b)*
6392 * (a | b?)+ == (a | b)*
6393 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006394 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006395 if ((cur->c1 != NULL) &&
6396 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6397 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6398 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6399 found = 1;
6400 }
6401 if ((cur->c2 != NULL) &&
6402 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6403 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6404 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6405 found = 1;
6406 }
6407 cur = cur->c2;
6408 }
6409 if (found)
6410 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6411 }
Owen Taylor3473f882001-02-23 17:55:21 +00006412 NEXT;
6413 }
6414 return(ret);
6415}
6416
6417/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006418 * xmlParseElementChildrenContentDecl:
6419 * @ctxt: an XML parser context
6420 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006421 *
6422 * parse the declaration for a Mixed Element content
6423 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6424 *
6425 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6426 *
6427 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6428 *
6429 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6430 *
6431 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6432 *
6433 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6434 * TODO Parameter-entity replacement text must be properly nested
6435 * with parenthesized groups. That is to say, if either of the
6436 * opening or closing parentheses in a choice, seq, or Mixed
6437 * construct is contained in the replacement text for a parameter
6438 * entity, both must be contained in the same replacement text. For
6439 * interoperability, if a parameter-entity reference appears in a
6440 * choice, seq, or Mixed construct, its replacement text should not
6441 * be empty, and neither the first nor last non-blank character of
6442 * the replacement text should be a connector (| or ,).
6443 *
6444 * Returns the tree of xmlElementContentPtr describing the element
6445 * hierarchy.
6446 */
6447xmlElementContentPtr
6448xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6449 /* stub left for API/ABI compat */
6450 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6451}
6452
6453/**
Owen Taylor3473f882001-02-23 17:55:21 +00006454 * xmlParseElementContentDecl:
6455 * @ctxt: an XML parser context
6456 * @name: the name of the element being defined.
6457 * @result: the Element Content pointer will be stored here if any
6458 *
6459 * parse the declaration for an Element content either Mixed or Children,
6460 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006461 *
Owen Taylor3473f882001-02-23 17:55:21 +00006462 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6463 *
6464 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6465 */
6466
6467int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006468xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006469 xmlElementContentPtr *result) {
6470
6471 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006472 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006473 int res;
6474
6475 *result = NULL;
6476
6477 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006478 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006479 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006480 return(-1);
6481 }
6482 NEXT;
6483 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006484 if (ctxt->instate == XML_PARSER_EOF)
6485 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006486 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006487 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006488 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006489 res = XML_ELEMENT_TYPE_MIXED;
6490 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006491 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006492 res = XML_ELEMENT_TYPE_ELEMENT;
6493 }
Owen Taylor3473f882001-02-23 17:55:21 +00006494 SKIP_BLANKS;
6495 *result = tree;
6496 return(res);
6497}
6498
6499/**
6500 * xmlParseElementDecl:
6501 * @ctxt: an XML parser context
6502 *
6503 * parse an Element declaration.
6504 *
6505 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6506 *
6507 * [ VC: Unique Element Type Declaration ]
6508 * No element type may be declared more than once
6509 *
6510 * Returns the type of the element, or -1 in case of error
6511 */
6512int
6513xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006514 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006515 int ret = -1;
6516 xmlElementContentPtr content = NULL;
6517
Daniel Veillard4c778d82005-01-23 17:37:44 +00006518 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006519 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006520 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006521
6522 SKIP(9);
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006523 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006524 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6525 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006526 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006527 }
Daniel Veillard76d66f42001-05-16 21:05:17 +00006528 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006529 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006530 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6531 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006532 return(-1);
6533 }
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006534 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006535 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6536 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006537 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00006538 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006539 SKIP(5);
6540 /*
6541 * Element must always be empty.
6542 */
6543 ret = XML_ELEMENT_TYPE_EMPTY;
6544 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6545 (NXT(2) == 'Y')) {
6546 SKIP(3);
6547 /*
6548 * Element is a generic container.
6549 */
6550 ret = XML_ELEMENT_TYPE_ANY;
6551 } else if (RAW == '(') {
6552 ret = xmlParseElementContentDecl(ctxt, name, &content);
6553 } else {
6554 /*
6555 * [ WFC: PEs in Internal Subset ] error handling.
6556 */
6557 if ((RAW == '%') && (ctxt->external == 0) &&
6558 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006559 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006560 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006561 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006562 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006563 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6564 }
Owen Taylor3473f882001-02-23 17:55:21 +00006565 return(-1);
6566 }
6567
6568 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006569
6570 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006571 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006572 if (content != NULL) {
6573 xmlFreeDocElementContent(ctxt->myDoc, content);
6574 }
Owen Taylor3473f882001-02-23 17:55:21 +00006575 } else {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006576 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006577 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006578 "Element declaration doesn't start and stop in"
6579 " the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006580 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006581
Owen Taylor3473f882001-02-23 17:55:21 +00006582 NEXT;
6583 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006584 (ctxt->sax->elementDecl != NULL)) {
6585 if (content != NULL)
6586 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006587 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6588 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006589 if ((content != NULL) && (content->parent == NULL)) {
6590 /*
6591 * this is a trick: if xmlAddElementDecl is called,
6592 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006593 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006594 * interfaces or change the API/ABI
6595 */
6596 xmlFreeDocElementContent(ctxt->myDoc, content);
6597 }
6598 } else if (content != NULL) {
6599 xmlFreeDocElementContent(ctxt->myDoc, content);
6600 }
Owen Taylor3473f882001-02-23 17:55:21 +00006601 }
Owen Taylor3473f882001-02-23 17:55:21 +00006602 }
6603 return(ret);
6604}
6605
6606/**
Owen Taylor3473f882001-02-23 17:55:21 +00006607 * xmlParseConditionalSections
6608 * @ctxt: an XML parser context
6609 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006610 * [61] conditionalSect ::= includeSect | ignoreSect
6611 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006612 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6613 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6614 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6615 */
6616
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006617static void
Owen Taylor3473f882001-02-23 17:55:21 +00006618xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006619 int id = ctxt->input->id;
6620
Owen Taylor3473f882001-02-23 17:55:21 +00006621 SKIP(3);
6622 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006623 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006624 SKIP(7);
6625 SKIP_BLANKS;
6626 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006627 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006628 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006629 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006630 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006631 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006632 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6633 "All markup of the conditional section is not"
6634 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006635 }
Owen Taylor3473f882001-02-23 17:55:21 +00006636 NEXT;
6637 }
6638 if (xmlParserDebugEntities) {
6639 if ((ctxt->input != NULL) && (ctxt->input->filename))
6640 xmlGenericError(xmlGenericErrorContext,
6641 "%s(%d): ", ctxt->input->filename,
6642 ctxt->input->line);
6643 xmlGenericError(xmlGenericErrorContext,
6644 "Entering INCLUDE Conditional Section\n");
6645 }
6646
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006647 SKIP_BLANKS;
6648 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006649 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6650 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006651 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006652 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006653
6654 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6655 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006656 } else
6657 xmlParseMarkupDecl(ctxt);
6658
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006659 SKIP_BLANKS;
6660 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006661
Daniel Veillardfdc91562002-07-01 21:52:03 +00006662 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006663 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006664 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006665 break;
6666 }
6667 }
6668 if (xmlParserDebugEntities) {
6669 if ((ctxt->input != NULL) && (ctxt->input->filename))
6670 xmlGenericError(xmlGenericErrorContext,
6671 "%s(%d): ", ctxt->input->filename,
6672 ctxt->input->line);
6673 xmlGenericError(xmlGenericErrorContext,
6674 "Leaving INCLUDE Conditional Section\n");
6675 }
6676
Daniel Veillarda07050d2003-10-19 14:46:32 +00006677 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006678 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006679 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006680 int depth = 0;
6681
6682 SKIP(6);
6683 SKIP_BLANKS;
6684 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006685 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006686 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006687 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006688 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006689 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006690 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6691 "All markup of the conditional section is not"
6692 " in the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006693 }
Owen Taylor3473f882001-02-23 17:55:21 +00006694 NEXT;
6695 }
6696 if (xmlParserDebugEntities) {
6697 if ((ctxt->input != NULL) && (ctxt->input->filename))
6698 xmlGenericError(xmlGenericErrorContext,
6699 "%s(%d): ", ctxt->input->filename,
6700 ctxt->input->line);
6701 xmlGenericError(xmlGenericErrorContext,
6702 "Entering IGNORE Conditional Section\n");
6703 }
6704
6705 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006706 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006707 * But disable SAX event generating DTD building in the meantime
6708 */
6709 state = ctxt->disableSAX;
6710 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006711 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006712 ctxt->instate = XML_PARSER_IGNORE;
6713
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006714 while (((depth >= 0) && (RAW != 0)) &&
6715 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006716 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6717 depth++;
6718 SKIP(3);
6719 continue;
6720 }
6721 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6722 if (--depth >= 0) SKIP(3);
6723 continue;
6724 }
6725 NEXT;
6726 continue;
6727 }
6728
6729 ctxt->disableSAX = state;
6730 ctxt->instate = instate;
6731
6732 if (xmlParserDebugEntities) {
6733 if ((ctxt->input != NULL) && (ctxt->input->filename))
6734 xmlGenericError(xmlGenericErrorContext,
6735 "%s(%d): ", ctxt->input->filename,
6736 ctxt->input->line);
6737 xmlGenericError(xmlGenericErrorContext,
6738 "Leaving IGNORE Conditional Section\n");
6739 }
6740
6741 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006742 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006743 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006744 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006745 }
6746
6747 if (RAW == 0)
6748 SHRINK;
6749
6750 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006751 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006752 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006753 if (ctxt->input->id != id) {
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02006754 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6755 "All markup of the conditional section is not in"
6756 " the same entity\n");
Daniel Veillard49d44052008-08-27 19:57:06 +00006757 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006758 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006759 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006760 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006761 }
6762}
6763
6764/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006765 * xmlParseMarkupDecl:
6766 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006767 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006768 * parse Markup declarations
6769 *
6770 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6771 * NotationDecl | PI | Comment
6772 *
6773 * [ VC: Proper Declaration/PE Nesting ]
6774 * Parameter-entity replacement text must be properly nested with
6775 * markup declarations. That is to say, if either the first character
6776 * or the last character of a markup declaration (markupdecl above) is
6777 * contained in the replacement text for a parameter-entity reference,
6778 * both must be contained in the same replacement text.
6779 *
6780 * [ WFC: PEs in Internal Subset ]
6781 * In the internal DTD subset, parameter-entity references can occur
6782 * only where markup declarations can occur, not within markup declarations.
6783 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006784 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006785 */
6786void
6787xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6788 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006789 if (CUR == '<') {
6790 if (NXT(1) == '!') {
6791 switch (NXT(2)) {
6792 case 'E':
6793 if (NXT(3) == 'L')
6794 xmlParseElementDecl(ctxt);
6795 else if (NXT(3) == 'N')
6796 xmlParseEntityDecl(ctxt);
6797 break;
6798 case 'A':
6799 xmlParseAttributeListDecl(ctxt);
6800 break;
6801 case 'N':
6802 xmlParseNotationDecl(ctxt);
6803 break;
6804 case '-':
6805 xmlParseComment(ctxt);
6806 break;
6807 default:
6808 /* there is an error but it will be detected later */
6809 break;
6810 }
6811 } else if (NXT(1) == '?') {
6812 xmlParsePI(ctxt);
6813 }
6814 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006815
6816 /*
6817 * detect requirement to exit there and act accordingly
6818 * and avoid having instate overriden later on
6819 */
6820 if (ctxt->instate == XML_PARSER_EOF)
6821 return;
6822
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006823 /*
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006824 * Conditional sections are allowed from entities included
6825 * by PE References in the internal subset.
6826 */
6827 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6828 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6829 xmlParseConditionalSections(ctxt);
6830 }
6831 }
6832
6833 ctxt->instate = XML_PARSER_DTD;
6834}
6835
6836/**
6837 * xmlParseTextDecl:
6838 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006839 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006840 * parse an XML declaration header for external entities
6841 *
6842 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006843 */
6844
6845void
6846xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6847 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006848 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006849
6850 /*
6851 * We know that '<?xml' is here.
6852 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006853 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006854 SKIP(5);
6855 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006856 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006857 return;
6858 }
6859
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006860 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006861 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6862 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006863 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006864
6865 /*
6866 * We may have the VersionInfo here.
6867 */
6868 version = xmlParseVersionInfo(ctxt);
6869 if (version == NULL)
6870 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006871 else {
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02006872 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006873 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6874 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006875 }
6876 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006877 ctxt->input->version = version;
6878
6879 /*
6880 * We must have the encoding declaration
6881 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006882 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006883 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6884 /*
6885 * The XML REC instructs us to stop parsing right here
6886 */
6887 return;
6888 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006889 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6890 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6891 "Missing encoding in text declaration\n");
6892 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006893
6894 SKIP_BLANKS;
6895 if ((RAW == '?') && (NXT(1) == '>')) {
6896 SKIP(2);
6897 } else if (RAW == '>') {
6898 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006899 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006900 NEXT;
6901 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006902 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006903 MOVETO_ENDTAG(CUR_PTR);
6904 NEXT;
6905 }
6906}
6907
6908/**
Owen Taylor3473f882001-02-23 17:55:21 +00006909 * xmlParseExternalSubset:
6910 * @ctxt: an XML parser context
6911 * @ExternalID: the external identifier
6912 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006913 *
Owen Taylor3473f882001-02-23 17:55:21 +00006914 * parse Markup declarations from an external subset
6915 *
6916 * [30] extSubset ::= textDecl? extSubsetDecl
6917 *
6918 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6919 */
6920void
6921xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6922 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006923 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006924 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006925
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006926 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006927 (ctxt->input->end - ctxt->input->cur >= 4)) {
6928 xmlChar start[4];
6929 xmlCharEncoding enc;
6930
6931 start[0] = RAW;
6932 start[1] = NXT(1);
6933 start[2] = NXT(2);
6934 start[3] = NXT(3);
6935 enc = xmlDetectCharEncoding(start, 4);
6936 if (enc != XML_CHAR_ENCODING_NONE)
6937 xmlSwitchEncoding(ctxt, enc);
6938 }
6939
Daniel Veillarda07050d2003-10-19 14:46:32 +00006940 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006941 xmlParseTextDecl(ctxt);
6942 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6943 /*
6944 * The XML REC instructs us to stop parsing right here
6945 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08006946 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006947 return;
6948 }
6949 }
6950 if (ctxt->myDoc == NULL) {
6951 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006952 if (ctxt->myDoc == NULL) {
6953 xmlErrMemory(ctxt, "New Doc failed");
6954 return;
6955 }
6956 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006957 }
6958 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6959 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6960
6961 ctxt->instate = XML_PARSER_DTD;
6962 ctxt->external = 1;
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006963 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006964 while (((RAW == '<') && (NXT(1) == '?')) ||
6965 ((RAW == '<') && (NXT(1) == '!')) ||
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006966 (RAW == '%')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006967 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006968 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006969
6970 GROW;
6971 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6972 xmlParseConditionalSections(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006973 } else
6974 xmlParseMarkupDecl(ctxt);
Nick Wellnhofer453dff12017-06-19 17:55:20 +02006975 SKIP_BLANKS;
Owen Taylor3473f882001-02-23 17:55:21 +00006976
Daniel Veillardfdc91562002-07-01 21:52:03 +00006977 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006978 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006979 break;
6980 }
6981 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006982
Owen Taylor3473f882001-02-23 17:55:21 +00006983 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006984 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006985 }
6986
6987}
6988
6989/**
6990 * xmlParseReference:
6991 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006992 *
Owen Taylor3473f882001-02-23 17:55:21 +00006993 * parse and handle entity references in content, depending on the SAX
6994 * interface, this may end-up in a call to character() if this is a
6995 * CharRef, a predefined entity, if there is no reference() callback.
6996 * or if the parser was asked to switch to that mode.
6997 *
6998 * [67] Reference ::= EntityRef | CharRef
6999 */
7000void
7001xmlParseReference(xmlParserCtxtPtr ctxt) {
7002 xmlEntityPtr ent;
7003 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007004 int was_checked;
7005 xmlNodePtr list = NULL;
7006 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007007
Daniel Veillard0161e632008-08-28 15:36:32 +00007008
7009 if (RAW != '&')
7010 return;
7011
7012 /*
7013 * Simple case of a CharRef
7014 */
Owen Taylor3473f882001-02-23 17:55:21 +00007015 if (NXT(1) == '#') {
7016 int i = 0;
7017 xmlChar out[10];
7018 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007019 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007020
Daniel Veillarddc171602008-03-26 17:41:38 +00007021 if (value == 0)
7022 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007023 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7024 /*
7025 * So we are using non-UTF-8 buffers
7026 * Check that the char fit on 8bits, if not
7027 * generate a CharRef.
7028 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007029 if (value <= 0xFF) {
7030 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007031 out[1] = 0;
7032 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7033 (!ctxt->disableSAX))
7034 ctxt->sax->characters(ctxt->userData, out, 1);
7035 } else {
7036 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007037 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007038 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007039 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007040 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7041 (!ctxt->disableSAX))
7042 ctxt->sax->reference(ctxt->userData, out);
7043 }
7044 } else {
7045 /*
7046 * Just encode the value in UTF-8
7047 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007048 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007049 out[i] = 0;
7050 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7051 (!ctxt->disableSAX))
7052 ctxt->sax->characters(ctxt->userData, out, i);
7053 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007054 return;
7055 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007056
Daniel Veillard0161e632008-08-28 15:36:32 +00007057 /*
7058 * We are seeing an entity reference
7059 */
7060 ent = xmlParseEntityRef(ctxt);
7061 if (ent == NULL) return;
7062 if (!ctxt->wellFormed)
7063 return;
7064 was_checked = ent->checked;
7065
7066 /* special case of predefined entities */
7067 if ((ent->name == NULL) ||
7068 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7069 val = ent->content;
7070 if (val == NULL) return;
7071 /*
7072 * inline the entity.
7073 */
7074 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7075 (!ctxt->disableSAX))
7076 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7077 return;
7078 }
7079
7080 /*
7081 * The first reference to the entity trigger a parsing phase
7082 * where the ent->children is filled with the result from
7083 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007084 * Note: external parsed entities will not be loaded, it is not
7085 * required for a non-validating parser, unless the parsing option
7086 * of validating, or substituting entities were given. Doing so is
7087 * far more secure as the parser will only process data coming from
7088 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007089 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007090 if (((ent->checked == 0) ||
7091 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007092 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7093 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007094 unsigned long oldnbent = ctxt->nbentities;
7095
7096 /*
7097 * This is a bit hackish but this seems the best
7098 * way to make sure both SAX and DOM entity support
7099 * behaves okay.
7100 */
7101 void *user_data;
7102 if (ctxt->userData == ctxt)
7103 user_data = NULL;
7104 else
7105 user_data = ctxt->userData;
7106
7107 /*
7108 * Check that this entity is well formed
7109 * 4.3.2: An internal general parsed entity is well-formed
7110 * if its replacement text matches the production labeled
7111 * content.
7112 */
7113 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7114 ctxt->depth++;
7115 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7116 user_data, &list);
7117 ctxt->depth--;
7118
7119 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7120 ctxt->depth++;
7121 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7122 user_data, ctxt->depth, ent->URI,
7123 ent->ExternalID, &list);
7124 ctxt->depth--;
7125 } else {
7126 ret = XML_ERR_ENTITY_PE_INTERNAL;
7127 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7128 "invalid entity type found\n", NULL);
7129 }
7130
7131 /*
7132 * Store the number of entities needing parsing for this entity
7133 * content and do checkings
7134 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007135 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7136 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7137 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007138 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007139 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007140 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007141 return;
7142 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007143 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007144 xmlFreeNodeList(list);
7145 return;
7146 }
Owen Taylor3473f882001-02-23 17:55:21 +00007147
Daniel Veillard0161e632008-08-28 15:36:32 +00007148 if ((ret == XML_ERR_OK) && (list != NULL)) {
7149 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7150 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7151 (ent->children == NULL)) {
7152 ent->children = list;
7153 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007154 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007155 * Prune it directly in the generated document
7156 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007157 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007158 if (((list->type == XML_TEXT_NODE) &&
7159 (list->next == NULL)) ||
7160 (ctxt->parseMode == XML_PARSE_READER)) {
7161 list->parent = (xmlNodePtr) ent;
7162 list = NULL;
7163 ent->owner = 1;
7164 } else {
7165 ent->owner = 0;
7166 while (list != NULL) {
7167 list->parent = (xmlNodePtr) ctxt->node;
7168 list->doc = ctxt->myDoc;
7169 if (list->next == NULL)
7170 ent->last = list;
7171 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007172 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007173 list = ent->children;
7174#ifdef LIBXML_LEGACY_ENABLED
7175 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7176 xmlAddEntityReference(ent, list, NULL);
7177#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007178 }
7179 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007180 ent->owner = 1;
7181 while (list != NULL) {
7182 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007183 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007184 if (list->next == NULL)
7185 ent->last = list;
7186 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007187 }
7188 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007189 } else {
7190 xmlFreeNodeList(list);
7191 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007192 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007193 } else if ((ret != XML_ERR_OK) &&
7194 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7195 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7196 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007197 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007198 } else if (list != NULL) {
7199 xmlFreeNodeList(list);
7200 list = NULL;
7201 }
7202 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007203 ent->checked = 2;
David Kilzer3f0627a2017-06-16 21:30:42 +02007204
7205 /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7206 was_checked = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007207 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007208 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007209 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007210
Daniel Veillard0161e632008-08-28 15:36:32 +00007211 /*
7212 * Now that the entity content has been gathered
7213 * provide it to the application, this can take different forms based
7214 * on the parsing modes.
7215 */
7216 if (ent->children == NULL) {
7217 /*
7218 * Probably running in SAX mode and the callbacks don't
7219 * build the entity content. So unless we already went
7220 * though parsing for first checking go though the entity
7221 * content to generate callbacks associated to the entity
7222 */
7223 if (was_checked != 0) {
7224 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007225 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007226 * This is a bit hackish but this seems the best
7227 * way to make sure both SAX and DOM entity support
7228 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007229 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007230 if (ctxt->userData == ctxt)
7231 user_data = NULL;
7232 else
7233 user_data = ctxt->userData;
7234
7235 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7236 ctxt->depth++;
7237 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7238 ent->content, user_data, NULL);
7239 ctxt->depth--;
7240 } else if (ent->etype ==
7241 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7242 ctxt->depth++;
7243 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7244 ctxt->sax, user_data, ctxt->depth,
7245 ent->URI, ent->ExternalID, NULL);
7246 ctxt->depth--;
7247 } else {
7248 ret = XML_ERR_ENTITY_PE_INTERNAL;
7249 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7250 "invalid entity type found\n", NULL);
7251 }
7252 if (ret == XML_ERR_ENTITY_LOOP) {
7253 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7254 return;
7255 }
7256 }
7257 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7258 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7259 /*
7260 * Entity reference callback comes second, it's somewhat
7261 * superfluous but a compatibility to historical behaviour
7262 */
7263 ctxt->sax->reference(ctxt->userData, ent->name);
7264 }
7265 return;
7266 }
7267
7268 /*
7269 * If we didn't get any children for the entity being built
7270 */
7271 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7272 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7273 /*
7274 * Create a node.
7275 */
7276 ctxt->sax->reference(ctxt->userData, ent->name);
7277 return;
7278 }
7279
7280 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7281 /*
7282 * There is a problem on the handling of _private for entities
7283 * (bug 155816): Should we copy the content of the field from
7284 * the entity (possibly overwriting some value set by the user
7285 * when a copy is created), should we leave it alone, or should
7286 * we try to take care of different situations? The problem
7287 * is exacerbated by the usage of this field by the xmlReader.
7288 * To fix this bug, we look at _private on the created node
7289 * and, if it's NULL, we copy in whatever was in the entity.
7290 * If it's not NULL we leave it alone. This is somewhat of a
7291 * hack - maybe we should have further tests to determine
7292 * what to do.
7293 */
7294 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7295 /*
7296 * Seems we are generating the DOM content, do
7297 * a simple tree copy for all references except the first
7298 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007299 */
7300 if (((list == NULL) && (ent->owner == 0)) ||
7301 (ctxt->parseMode == XML_PARSE_READER)) {
7302 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7303
7304 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007305 * We are copying here, make sure there is no abuse
7306 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007307 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007308 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7309 return;
7310
7311 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007312 * when operating on a reader, the entities definitions
7313 * are always owning the entities subtree.
7314 if (ctxt->parseMode == XML_PARSE_READER)
7315 ent->owner = 1;
7316 */
7317
7318 cur = ent->children;
7319 while (cur != NULL) {
7320 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7321 if (nw != NULL) {
7322 if (nw->_private == NULL)
7323 nw->_private = cur->_private;
7324 if (firstChild == NULL){
7325 firstChild = nw;
7326 }
7327 nw = xmlAddChild(ctxt->node, nw);
7328 }
7329 if (cur == ent->last) {
7330 /*
7331 * needed to detect some strange empty
7332 * node cases in the reader tests
7333 */
7334 if ((ctxt->parseMode == XML_PARSE_READER) &&
7335 (nw != NULL) &&
7336 (nw->type == XML_ELEMENT_NODE) &&
7337 (nw->children == NULL))
7338 nw->extra = 1;
7339
7340 break;
7341 }
7342 cur = cur->next;
7343 }
7344#ifdef LIBXML_LEGACY_ENABLED
7345 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7346 xmlAddEntityReference(ent, firstChild, nw);
7347#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007348 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007349 xmlNodePtr nw = NULL, cur, next, last,
7350 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007351
7352 /*
7353 * We are copying here, make sure there is no abuse
7354 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007355 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007356 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7357 return;
7358
Daniel Veillard0161e632008-08-28 15:36:32 +00007359 /*
7360 * Copy the entity child list and make it the new
7361 * entity child list. The goal is to make sure any
7362 * ID or REF referenced will be the one from the
7363 * document content and not the entity copy.
7364 */
7365 cur = ent->children;
7366 ent->children = NULL;
7367 last = ent->last;
7368 ent->last = NULL;
7369 while (cur != NULL) {
7370 next = cur->next;
7371 cur->next = NULL;
7372 cur->parent = NULL;
7373 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7374 if (nw != NULL) {
7375 if (nw->_private == NULL)
7376 nw->_private = cur->_private;
7377 if (firstChild == NULL){
7378 firstChild = cur;
7379 }
7380 xmlAddChild((xmlNodePtr) ent, nw);
7381 xmlAddChild(ctxt->node, cur);
7382 }
7383 if (cur == last)
7384 break;
7385 cur = next;
7386 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007387 if (ent->owner == 0)
7388 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007389#ifdef LIBXML_LEGACY_ENABLED
7390 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7391 xmlAddEntityReference(ent, firstChild, nw);
7392#endif /* LIBXML_LEGACY_ENABLED */
7393 } else {
7394 const xmlChar *nbktext;
7395
7396 /*
7397 * the name change is to avoid coalescing of the
7398 * node with a possible previous text one which
7399 * would make ent->children a dangling pointer
7400 */
7401 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7402 -1);
7403 if (ent->children->type == XML_TEXT_NODE)
7404 ent->children->name = nbktext;
7405 if ((ent->last != ent->children) &&
7406 (ent->last->type == XML_TEXT_NODE))
7407 ent->last->name = nbktext;
7408 xmlAddChildList(ctxt->node, ent->children);
7409 }
7410
7411 /*
7412 * This is to avoid a nasty side effect, see
7413 * characters() in SAX.c
7414 */
7415 ctxt->nodemem = 0;
7416 ctxt->nodelen = 0;
7417 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007418 }
7419 }
7420}
7421
7422/**
7423 * xmlParseEntityRef:
7424 * @ctxt: an XML parser context
7425 *
7426 * parse ENTITY references declarations
7427 *
7428 * [68] EntityRef ::= '&' Name ';'
7429 *
7430 * [ WFC: Entity Declared ]
7431 * In a document without any DTD, a document with only an internal DTD
7432 * subset which contains no parameter entity references, or a document
7433 * with "standalone='yes'", the Name given in the entity reference
7434 * must match that in an entity declaration, except that well-formed
7435 * documents need not declare any of the following entities: amp, lt,
7436 * gt, apos, quot. The declaration of a parameter entity must precede
7437 * any reference to it. Similarly, the declaration of a general entity
7438 * must precede any reference to it which appears in a default value in an
7439 * attribute-list declaration. Note that if entities are declared in the
7440 * external subset or in external parameter entities, a non-validating
7441 * processor is not obligated to read and process their declarations;
7442 * for such documents, the rule that an entity must be declared is a
7443 * well-formedness constraint only if standalone='yes'.
7444 *
7445 * [ WFC: Parsed Entity ]
7446 * An entity reference must not contain the name of an unparsed entity
7447 *
7448 * Returns the xmlEntityPtr if found, or NULL otherwise.
7449 */
7450xmlEntityPtr
7451xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007452 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007453 xmlEntityPtr ent = NULL;
7454
7455 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007456 if (ctxt->instate == XML_PARSER_EOF)
7457 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007458
Daniel Veillard0161e632008-08-28 15:36:32 +00007459 if (RAW != '&')
7460 return(NULL);
7461 NEXT;
7462 name = xmlParseName(ctxt);
7463 if (name == NULL) {
7464 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7465 "xmlParseEntityRef: no name\n");
7466 return(NULL);
7467 }
7468 if (RAW != ';') {
7469 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7470 return(NULL);
7471 }
7472 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007473
Daniel Veillard0161e632008-08-28 15:36:32 +00007474 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007475 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007476 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007477 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7478 ent = xmlGetPredefinedEntity(name);
7479 if (ent != NULL)
7480 return(ent);
7481 }
Owen Taylor3473f882001-02-23 17:55:21 +00007482
Daniel Veillard0161e632008-08-28 15:36:32 +00007483 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007484 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007485 */
7486 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007487
Daniel Veillard0161e632008-08-28 15:36:32 +00007488 /*
7489 * Ask first SAX for entity resolution, otherwise try the
7490 * entities which may have stored in the parser context.
7491 */
7492 if (ctxt->sax != NULL) {
7493 if (ctxt->sax->getEntity != NULL)
7494 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007495 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007496 (ctxt->options & XML_PARSE_OLDSAX))
7497 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007498 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7499 (ctxt->userData==ctxt)) {
7500 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007501 }
7502 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007503 if (ctxt->instate == XML_PARSER_EOF)
7504 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007505 /*
7506 * [ WFC: Entity Declared ]
7507 * In a document without any DTD, a document with only an
7508 * internal DTD subset which contains no parameter entity
7509 * references, or a document with "standalone='yes'", the
7510 * Name given in the entity reference must match that in an
7511 * entity declaration, except that well-formed documents
7512 * need not declare any of the following entities: amp, lt,
7513 * gt, apos, quot.
7514 * The declaration of a parameter entity must precede any
7515 * reference to it.
7516 * Similarly, the declaration of a general entity must
7517 * precede any reference to it which appears in a default
7518 * value in an attribute-list declaration. Note that if
7519 * entities are declared in the external subset or in
7520 * external parameter entities, a non-validating processor
7521 * is not obligated to read and process their declarations;
7522 * for such documents, the rule that an entity must be
7523 * declared is a well-formedness constraint only if
7524 * standalone='yes'.
7525 */
7526 if (ent == NULL) {
7527 if ((ctxt->standalone == 1) ||
7528 ((ctxt->hasExternalSubset == 0) &&
7529 (ctxt->hasPErefs == 0))) {
7530 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7531 "Entity '%s' not defined\n", name);
7532 } else {
7533 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7534 "Entity '%s' not defined\n", name);
7535 if ((ctxt->inSubset == 0) &&
7536 (ctxt->sax != NULL) &&
7537 (ctxt->sax->reference != NULL)) {
7538 ctxt->sax->reference(ctxt->userData, name);
7539 }
7540 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007541 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007542 ctxt->valid = 0;
7543 }
7544
7545 /*
7546 * [ WFC: Parsed Entity ]
7547 * An entity reference must not contain the name of an
7548 * unparsed entity
7549 */
7550 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7551 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7552 "Entity reference to unparsed entity %s\n", name);
7553 }
7554
7555 /*
7556 * [ WFC: No External Entity References ]
7557 * Attribute values cannot contain direct or indirect
7558 * entity references to external entities.
7559 */
7560 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7561 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7562 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7563 "Attribute references external entity '%s'\n", name);
7564 }
7565 /*
7566 * [ WFC: No < in Attribute Values ]
7567 * The replacement text of any entity referred to directly or
7568 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007569 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007570 */
7571 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007572 (ent != NULL) &&
7573 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007574 if (((ent->checked & 1) || (ent->checked == 0)) &&
7575 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007576 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7577 "'<' in entity '%s' is not allowed in attributes values\n", name);
7578 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007579 }
7580
7581 /*
7582 * Internal check, no parameter entities here ...
7583 */
7584 else {
7585 switch (ent->etype) {
7586 case XML_INTERNAL_PARAMETER_ENTITY:
7587 case XML_EXTERNAL_PARAMETER_ENTITY:
7588 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7589 "Attempt to reference the parameter entity '%s'\n",
7590 name);
7591 break;
7592 default:
7593 break;
7594 }
7595 }
7596
7597 /*
7598 * [ WFC: No Recursion ]
7599 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007600 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007601 * Done somewhere else
7602 */
Owen Taylor3473f882001-02-23 17:55:21 +00007603 return(ent);
7604}
7605
7606/**
7607 * xmlParseStringEntityRef:
7608 * @ctxt: an XML parser context
7609 * @str: a pointer to an index in the string
7610 *
7611 * parse ENTITY references declarations, but this version parses it from
7612 * a string value.
7613 *
7614 * [68] EntityRef ::= '&' Name ';'
7615 *
7616 * [ WFC: Entity Declared ]
7617 * In a document without any DTD, a document with only an internal DTD
7618 * subset which contains no parameter entity references, or a document
7619 * with "standalone='yes'", the Name given in the entity reference
7620 * must match that in an entity declaration, except that well-formed
7621 * documents need not declare any of the following entities: amp, lt,
7622 * gt, apos, quot. The declaration of a parameter entity must precede
7623 * any reference to it. Similarly, the declaration of a general entity
7624 * must precede any reference to it which appears in a default value in an
7625 * attribute-list declaration. Note that if entities are declared in the
7626 * external subset or in external parameter entities, a non-validating
7627 * processor is not obligated to read and process their declarations;
7628 * for such documents, the rule that an entity must be declared is a
7629 * well-formedness constraint only if standalone='yes'.
7630 *
7631 * [ WFC: Parsed Entity ]
7632 * An entity reference must not contain the name of an unparsed entity
7633 *
7634 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7635 * is updated to the current location in the string.
7636 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007637static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007638xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7639 xmlChar *name;
7640 const xmlChar *ptr;
7641 xmlChar cur;
7642 xmlEntityPtr ent = NULL;
7643
7644 if ((str == NULL) || (*str == NULL))
7645 return(NULL);
7646 ptr = *str;
7647 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007648 if (cur != '&')
7649 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007650
Daniel Veillard0161e632008-08-28 15:36:32 +00007651 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007652 name = xmlParseStringName(ctxt, &ptr);
7653 if (name == NULL) {
7654 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7655 "xmlParseStringEntityRef: no name\n");
7656 *str = ptr;
7657 return(NULL);
7658 }
7659 if (*ptr != ';') {
7660 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007661 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007662 *str = ptr;
7663 return(NULL);
7664 }
7665 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007666
Owen Taylor3473f882001-02-23 17:55:21 +00007667
Daniel Veillard0161e632008-08-28 15:36:32 +00007668 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007669 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007670 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007671 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7672 ent = xmlGetPredefinedEntity(name);
7673 if (ent != NULL) {
7674 xmlFree(name);
7675 *str = ptr;
7676 return(ent);
7677 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007678 }
Owen Taylor3473f882001-02-23 17:55:21 +00007679
Daniel Veillard0161e632008-08-28 15:36:32 +00007680 /*
7681 * Increate the number of entity references parsed
7682 */
7683 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007684
Daniel Veillard0161e632008-08-28 15:36:32 +00007685 /*
7686 * Ask first SAX for entity resolution, otherwise try the
7687 * entities which may have stored in the parser context.
7688 */
7689 if (ctxt->sax != NULL) {
7690 if (ctxt->sax->getEntity != NULL)
7691 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007692 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7693 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007694 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7695 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007696 }
7697 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007698 if (ctxt->instate == XML_PARSER_EOF) {
7699 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007700 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007701 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007702
7703 /*
7704 * [ WFC: Entity Declared ]
7705 * In a document without any DTD, a document with only an
7706 * internal DTD subset which contains no parameter entity
7707 * references, or a document with "standalone='yes'", the
7708 * Name given in the entity reference must match that in an
7709 * entity declaration, except that well-formed documents
7710 * need not declare any of the following entities: amp, lt,
7711 * gt, apos, quot.
7712 * The declaration of a parameter entity must precede any
7713 * reference to it.
7714 * Similarly, the declaration of a general entity must
7715 * precede any reference to it which appears in a default
7716 * value in an attribute-list declaration. Note that if
7717 * entities are declared in the external subset or in
7718 * external parameter entities, a non-validating processor
7719 * is not obligated to read and process their declarations;
7720 * for such documents, the rule that an entity must be
7721 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007722 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007723 */
7724 if (ent == NULL) {
7725 if ((ctxt->standalone == 1) ||
7726 ((ctxt->hasExternalSubset == 0) &&
7727 (ctxt->hasPErefs == 0))) {
7728 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7729 "Entity '%s' not defined\n", name);
7730 } else {
7731 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7732 "Entity '%s' not defined\n",
7733 name);
7734 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007735 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007736 /* TODO ? check regressions ctxt->valid = 0; */
7737 }
7738
7739 /*
7740 * [ WFC: Parsed Entity ]
7741 * An entity reference must not contain the name of an
7742 * unparsed entity
7743 */
7744 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7745 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7746 "Entity reference to unparsed entity %s\n", name);
7747 }
7748
7749 /*
7750 * [ WFC: No External Entity References ]
7751 * Attribute values cannot contain direct or indirect
7752 * entity references to external entities.
7753 */
7754 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7755 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7756 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7757 "Attribute references external entity '%s'\n", name);
7758 }
7759 /*
7760 * [ WFC: No < in Attribute Values ]
7761 * The replacement text of any entity referred to directly or
7762 * indirectly in an attribute value (other than "&lt;") must
7763 * not contain a <.
7764 */
7765 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7766 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007767 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007768 (xmlStrchr(ent->content, '<'))) {
7769 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7770 "'<' in entity '%s' is not allowed in attributes values\n",
7771 name);
7772 }
7773
7774 /*
7775 * Internal check, no parameter entities here ...
7776 */
7777 else {
7778 switch (ent->etype) {
7779 case XML_INTERNAL_PARAMETER_ENTITY:
7780 case XML_EXTERNAL_PARAMETER_ENTITY:
7781 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7782 "Attempt to reference the parameter entity '%s'\n",
7783 name);
7784 break;
7785 default:
7786 break;
7787 }
7788 }
7789
7790 /*
7791 * [ WFC: No Recursion ]
7792 * A parsed entity must not contain a recursive reference
7793 * to itself, either directly or indirectly.
7794 * Done somewhere else
7795 */
7796
7797 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007798 *str = ptr;
7799 return(ent);
7800}
7801
7802/**
7803 * xmlParsePEReference:
7804 * @ctxt: an XML parser context
7805 *
7806 * parse PEReference declarations
7807 * The entity content is handled directly by pushing it's content as
7808 * a new input stream.
7809 *
7810 * [69] PEReference ::= '%' Name ';'
7811 *
7812 * [ WFC: No Recursion ]
7813 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007814 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007815 *
7816 * [ WFC: Entity Declared ]
7817 * In a document without any DTD, a document with only an internal DTD
7818 * subset which contains no parameter entity references, or a document
7819 * with "standalone='yes'", ... ... The declaration of a parameter
7820 * entity must precede any reference to it...
7821 *
7822 * [ VC: Entity Declared ]
7823 * In a document with an external subset or external parameter entities
7824 * with "standalone='no'", ... ... The declaration of a parameter entity
7825 * must precede any reference to it...
7826 *
7827 * [ WFC: In DTD ]
7828 * Parameter-entity references may only appear in the DTD.
7829 * NOTE: misleading but this is handled.
7830 */
7831void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007832xmlParsePEReference(xmlParserCtxtPtr ctxt)
7833{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007834 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007835 xmlEntityPtr entity = NULL;
7836 xmlParserInputPtr input;
7837
Daniel Veillard0161e632008-08-28 15:36:32 +00007838 if (RAW != '%')
7839 return;
7840 NEXT;
7841 name = xmlParseName(ctxt);
7842 if (name == NULL) {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007843 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
Daniel Veillard0161e632008-08-28 15:36:32 +00007844 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007845 }
Nick Wellnhofer03904152017-06-05 21:16:00 +02007846 if (xmlParserDebugEntities)
7847 xmlGenericError(xmlGenericErrorContext,
7848 "PEReference: %s\n", name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007849 if (RAW != ';') {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007850 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007851 return;
7852 }
7853
7854 NEXT;
7855
7856 /*
7857 * Increate the number of entity references parsed
7858 */
7859 ctxt->nbentities++;
7860
7861 /*
7862 * Request the entity from SAX
7863 */
7864 if ((ctxt->sax != NULL) &&
7865 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007866 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7867 if (ctxt->instate == XML_PARSER_EOF)
7868 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007869 if (entity == NULL) {
7870 /*
7871 * [ WFC: Entity Declared ]
7872 * In a document without any DTD, a document with only an
7873 * internal DTD subset which contains no parameter entity
7874 * references, or a document with "standalone='yes'", ...
7875 * ... The declaration of a parameter entity must precede
7876 * any reference to it...
7877 */
7878 if ((ctxt->standalone == 1) ||
7879 ((ctxt->hasExternalSubset == 0) &&
7880 (ctxt->hasPErefs == 0))) {
7881 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7882 "PEReference: %%%s; not found\n",
7883 name);
7884 } else {
7885 /*
7886 * [ VC: Entity Declared ]
7887 * In a document with an external subset or external
7888 * parameter entities with "standalone='no'", ...
7889 * ... The declaration of a parameter entity must
7890 * precede any reference to it...
7891 */
Nick Wellnhofer03904152017-06-05 21:16:00 +02007892 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7893 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7894 "PEReference: %%%s; not found\n",
7895 name, NULL);
7896 } else
7897 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7898 "PEReference: %%%s; not found\n",
7899 name, NULL);
7900 ctxt->valid = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +00007901 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007902 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007903 } else {
7904 /*
7905 * Internal checking in case the entity quest barfed
7906 */
7907 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7908 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7909 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7910 "Internal: %%%s; is not a parameter entity\n",
7911 name, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007912 } else {
Nick Wellnhofer03904152017-06-05 21:16:00 +02007913 xmlChar start[4];
7914 xmlCharEncoding enc;
7915
Neel Mehta90ccb582017-04-07 17:43:02 +02007916 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7917 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7918 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7919 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7920 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7921 (ctxt->replaceEntities == 0) &&
7922 (ctxt->validate == 0))
7923 return;
7924
Daniel Veillard0161e632008-08-28 15:36:32 +00007925 input = xmlNewEntityInputStream(ctxt, entity);
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007926 if (xmlPushInput(ctxt, input) < 0) {
7927 xmlFreeInputStream(input);
Daniel Veillard0161e632008-08-28 15:36:32 +00007928 return;
Nick Wellnhofer899a5d92017-07-25 14:59:49 +02007929 }
Nick Wellnhofer46dc9892017-06-08 02:24:56 +02007930
7931 if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7932 /*
7933 * Get the 4 first bytes and decode the charset
7934 * if enc != XML_CHAR_ENCODING_NONE
7935 * plug some encoding conversion routines.
7936 * Note that, since we may have some non-UTF8
7937 * encoding (like UTF16, bug 135229), the 'length'
7938 * is not known, but we can calculate based upon
7939 * the amount of data in the buffer.
7940 */
7941 GROW
7942 if (ctxt->instate == XML_PARSER_EOF)
7943 return;
7944 if ((ctxt->input->end - ctxt->input->cur)>=4) {
7945 start[0] = RAW;
7946 start[1] = NXT(1);
7947 start[2] = NXT(2);
7948 start[3] = NXT(3);
7949 enc = xmlDetectCharEncoding(start, 4);
7950 if (enc != XML_CHAR_ENCODING_NONE) {
7951 xmlSwitchEncoding(ctxt, enc);
7952 }
7953 }
7954
7955 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7956 (IS_BLANK_CH(NXT(5)))) {
7957 xmlParseTextDecl(ctxt);
Nick Wellnhofer03904152017-06-05 21:16:00 +02007958 }
7959 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007960 }
7961 }
7962 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007963}
7964
7965/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007966 * xmlLoadEntityContent:
7967 * @ctxt: an XML parser context
7968 * @entity: an unloaded system entity
7969 *
7970 * Load the original content of the given system entity from the
7971 * ExternalID/SystemID given. This is to be used for Included in Literal
7972 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7973 *
7974 * Returns 0 in case of success and -1 in case of failure
7975 */
7976static int
7977xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7978 xmlParserInputPtr input;
7979 xmlBufferPtr buf;
7980 int l, c;
7981 int count = 0;
7982
7983 if ((ctxt == NULL) || (entity == NULL) ||
7984 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7985 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7986 (entity->content != NULL)) {
7987 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7988 "xmlLoadEntityContent parameter error");
7989 return(-1);
7990 }
7991
7992 if (xmlParserDebugEntities)
7993 xmlGenericError(xmlGenericErrorContext,
7994 "Reading %s entity content input\n", entity->name);
7995
7996 buf = xmlBufferCreate();
7997 if (buf == NULL) {
7998 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7999 "xmlLoadEntityContent parameter error");
8000 return(-1);
8001 }
8002
8003 input = xmlNewEntityInputStream(ctxt, entity);
8004 if (input == NULL) {
8005 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8006 "xmlLoadEntityContent input error");
8007 xmlBufferFree(buf);
8008 return(-1);
8009 }
8010
8011 /*
8012 * Push the entity as the current input, read char by char
8013 * saving to the buffer until the end of the entity or an error
8014 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008015 if (xmlPushInput(ctxt, input) < 0) {
8016 xmlBufferFree(buf);
8017 return(-1);
8018 }
8019
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008020 GROW;
8021 c = CUR_CHAR(l);
8022 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8023 (IS_CHAR(c))) {
8024 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008025 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008026 count = 0;
8027 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008028 if (ctxt->instate == XML_PARSER_EOF) {
8029 xmlBufferFree(buf);
8030 return(-1);
8031 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008032 }
8033 NEXTL(l);
8034 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008035 if (c == 0) {
8036 count = 0;
8037 GROW;
8038 if (ctxt->instate == XML_PARSER_EOF) {
8039 xmlBufferFree(buf);
8040 return(-1);
8041 }
8042 c = CUR_CHAR(l);
8043 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008044 }
8045
8046 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8047 xmlPopInput(ctxt);
8048 } else if (!IS_CHAR(c)) {
8049 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8050 "xmlLoadEntityContent: invalid char value %d\n",
8051 c);
8052 xmlBufferFree(buf);
8053 return(-1);
8054 }
8055 entity->content = buf->content;
8056 buf->content = NULL;
8057 xmlBufferFree(buf);
8058
8059 return(0);
8060}
8061
8062/**
Owen Taylor3473f882001-02-23 17:55:21 +00008063 * xmlParseStringPEReference:
8064 * @ctxt: an XML parser context
8065 * @str: a pointer to an index in the string
8066 *
8067 * parse PEReference declarations
8068 *
8069 * [69] PEReference ::= '%' Name ';'
8070 *
8071 * [ WFC: No Recursion ]
8072 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008073 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008074 *
8075 * [ WFC: Entity Declared ]
8076 * In a document without any DTD, a document with only an internal DTD
8077 * subset which contains no parameter entity references, or a document
8078 * with "standalone='yes'", ... ... The declaration of a parameter
8079 * entity must precede any reference to it...
8080 *
8081 * [ VC: Entity Declared ]
8082 * In a document with an external subset or external parameter entities
8083 * with "standalone='no'", ... ... The declaration of a parameter entity
8084 * must precede any reference to it...
8085 *
8086 * [ WFC: In DTD ]
8087 * Parameter-entity references may only appear in the DTD.
8088 * NOTE: misleading but this is handled.
8089 *
8090 * Returns the string of the entity content.
8091 * str is updated to the current value of the index
8092 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008093static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008094xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8095 const xmlChar *ptr;
8096 xmlChar cur;
8097 xmlChar *name;
8098 xmlEntityPtr entity = NULL;
8099
8100 if ((str == NULL) || (*str == NULL)) return(NULL);
8101 ptr = *str;
8102 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008103 if (cur != '%')
8104 return(NULL);
8105 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008106 name = xmlParseStringName(ctxt, &ptr);
8107 if (name == NULL) {
8108 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8109 "xmlParseStringPEReference: no name\n");
8110 *str = ptr;
8111 return(NULL);
8112 }
8113 cur = *ptr;
8114 if (cur != ';') {
8115 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8116 xmlFree(name);
8117 *str = ptr;
8118 return(NULL);
8119 }
8120 ptr++;
8121
8122 /*
8123 * Increate the number of entity references parsed
8124 */
8125 ctxt->nbentities++;
8126
8127 /*
8128 * Request the entity from SAX
8129 */
8130 if ((ctxt->sax != NULL) &&
8131 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008132 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8133 if (ctxt->instate == XML_PARSER_EOF) {
8134 xmlFree(name);
Nick Wellnhoferfb2f5182017-06-10 17:06:16 +02008135 *str = ptr;
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008136 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008137 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008138 if (entity == NULL) {
8139 /*
8140 * [ WFC: Entity Declared ]
8141 * In a document without any DTD, a document with only an
8142 * internal DTD subset which contains no parameter entity
8143 * references, or a document with "standalone='yes'", ...
8144 * ... The declaration of a parameter entity must precede
8145 * any reference to it...
8146 */
8147 if ((ctxt->standalone == 1) ||
8148 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8149 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8150 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008151 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008152 /*
8153 * [ VC: Entity Declared ]
8154 * In a document with an external subset or external
8155 * parameter entities with "standalone='no'", ...
8156 * ... The declaration of a parameter entity must
8157 * precede any reference to it...
8158 */
8159 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8160 "PEReference: %%%s; not found\n",
8161 name, NULL);
8162 ctxt->valid = 0;
8163 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008164 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008165 } else {
8166 /*
8167 * Internal checking in case the entity quest barfed
8168 */
8169 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8170 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8171 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8172 "%%%s; is not a parameter entity\n",
8173 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008174 }
8175 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008176 ctxt->hasPErefs = 1;
8177 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008178 *str = ptr;
8179 return(entity);
8180}
8181
8182/**
8183 * xmlParseDocTypeDecl:
8184 * @ctxt: an XML parser context
8185 *
8186 * parse a DOCTYPE declaration
8187 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008188 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008189 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8190 *
8191 * [ VC: Root Element Type ]
8192 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008193 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008194 */
8195
8196void
8197xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008198 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008199 xmlChar *ExternalID = NULL;
8200 xmlChar *URI = NULL;
8201
8202 /*
8203 * We know that '<!DOCTYPE' has been detected.
8204 */
8205 SKIP(9);
8206
8207 SKIP_BLANKS;
8208
8209 /*
8210 * Parse the DOCTYPE name.
8211 */
8212 name = xmlParseName(ctxt);
8213 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008214 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8215 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008216 }
8217 ctxt->intSubName = name;
8218
8219 SKIP_BLANKS;
8220
8221 /*
8222 * Check for SystemID and ExternalID
8223 */
8224 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8225
8226 if ((URI != NULL) || (ExternalID != NULL)) {
8227 ctxt->hasExternalSubset = 1;
8228 }
8229 ctxt->extSubURI = URI;
8230 ctxt->extSubSystem = ExternalID;
8231
8232 SKIP_BLANKS;
8233
8234 /*
8235 * Create and update the internal subset.
8236 */
8237 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8238 (!ctxt->disableSAX))
8239 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008240 if (ctxt->instate == XML_PARSER_EOF)
8241 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008242
8243 /*
8244 * Is there any internal subset declarations ?
8245 * they are handled separately in xmlParseInternalSubset()
8246 */
8247 if (RAW == '[')
8248 return;
8249
8250 /*
8251 * We should be at the end of the DOCTYPE declaration.
8252 */
8253 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008254 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008255 }
8256 NEXT;
8257}
8258
8259/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008260 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008261 * @ctxt: an XML parser context
8262 *
8263 * parse the internal subset declaration
8264 *
8265 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8266 */
8267
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008268static void
Owen Taylor3473f882001-02-23 17:55:21 +00008269xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8270 /*
8271 * Is there any DTD definition ?
8272 */
8273 if (RAW == '[') {
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008274 int baseInputNr = ctxt->inputNr;
Owen Taylor3473f882001-02-23 17:55:21 +00008275 ctxt->instate = XML_PARSER_DTD;
8276 NEXT;
8277 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008278 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008279 * PEReferences.
8280 * Subsequence (markupdecl | PEReference | S)*
8281 */
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008282 while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
Nick Wellnhofer453dff12017-06-19 17:55:20 +02008283 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008284 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008285 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008286
8287 SKIP_BLANKS;
8288 xmlParseMarkupDecl(ctxt);
8289 xmlParsePEReference(ctxt);
8290
Owen Taylor3473f882001-02-23 17:55:21 +00008291 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008292 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008293 "xmlParseInternalSubset: error detected in Markup declaration\n");
Nick Wellnhoferb90d8982017-09-19 15:45:35 +02008294 if (ctxt->inputNr > baseInputNr)
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02008295 xmlPopInput(ctxt);
8296 else
8297 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008298 }
8299 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008300 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008301 NEXT;
8302 SKIP_BLANKS;
8303 }
8304 }
8305
8306 /*
8307 * We should be at the end of the DOCTYPE declaration.
8308 */
8309 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008310 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008311 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008312 }
8313 NEXT;
8314}
8315
Daniel Veillard81273902003-09-30 00:43:48 +00008316#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008317/**
8318 * xmlParseAttribute:
8319 * @ctxt: an XML parser context
8320 * @value: a xmlChar ** used to store the value of the attribute
8321 *
8322 * parse an attribute
8323 *
8324 * [41] Attribute ::= Name Eq AttValue
8325 *
8326 * [ WFC: No External Entity References ]
8327 * Attribute values cannot contain direct or indirect entity references
8328 * to external entities.
8329 *
8330 * [ WFC: No < in Attribute Values ]
8331 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008332 * an attribute value (other than "&lt;") must not contain a <.
8333 *
Owen Taylor3473f882001-02-23 17:55:21 +00008334 * [ VC: Attribute Value Type ]
8335 * The attribute must have been declared; the value must be of the type
8336 * declared for it.
8337 *
8338 * [25] Eq ::= S? '=' S?
8339 *
8340 * With namespace:
8341 *
8342 * [NS 11] Attribute ::= QName Eq AttValue
8343 *
8344 * Also the case QName == xmlns:??? is handled independently as a namespace
8345 * definition.
8346 *
8347 * Returns the attribute name, and the value in *value.
8348 */
8349
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008350const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008351xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008352 const xmlChar *name;
8353 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008354
8355 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008356 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008357 name = xmlParseName(ctxt);
8358 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008359 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008360 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008361 return(NULL);
8362 }
8363
8364 /*
8365 * read the value
8366 */
8367 SKIP_BLANKS;
8368 if (RAW == '=') {
8369 NEXT;
8370 SKIP_BLANKS;
8371 val = xmlParseAttValue(ctxt);
8372 ctxt->instate = XML_PARSER_CONTENT;
8373 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008374 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02008375 "Specification mandates value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008376 return(NULL);
8377 }
8378
8379 /*
8380 * Check that xml:lang conforms to the specification
8381 * No more registered as an error, just generate a warning now
8382 * since this was deprecated in XML second edition
8383 */
8384 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8385 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008386 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8387 "Malformed value for xml:lang : %s\n",
8388 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008389 }
8390 }
8391
8392 /*
8393 * Check that xml:space conforms to the specification
8394 */
8395 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8396 if (xmlStrEqual(val, BAD_CAST "default"))
8397 *(ctxt->space) = 0;
8398 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8399 *(ctxt->space) = 1;
8400 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008401 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008402"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008403 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008404 }
8405 }
8406
8407 *value = val;
8408 return(name);
8409}
8410
8411/**
8412 * xmlParseStartTag:
8413 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008414 *
Owen Taylor3473f882001-02-23 17:55:21 +00008415 * parse a start of tag either for rule element or
8416 * EmptyElement. In both case we don't parse the tag closing chars.
8417 *
8418 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8419 *
8420 * [ WFC: Unique Att Spec ]
8421 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008422 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008423 *
8424 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8425 *
8426 * [ WFC: Unique Att Spec ]
8427 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008428 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008429 *
8430 * With namespace:
8431 *
8432 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8433 *
8434 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8435 *
8436 * Returns the element name parsed
8437 */
8438
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008439const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008440xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008441 const xmlChar *name;
8442 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008443 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008444 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008445 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008446 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008447 int i;
8448
8449 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008450 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008451
8452 name = xmlParseName(ctxt);
8453 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008454 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008455 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008456 return(NULL);
8457 }
8458
8459 /*
8460 * Now parse the attributes, it ends up with the ending
8461 *
8462 * (S Attribute)* S?
8463 */
8464 SKIP_BLANKS;
8465 GROW;
8466
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008467 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008468 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008469 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008470 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008471 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008472
8473 attname = xmlParseAttribute(ctxt, &attvalue);
8474 if ((attname != NULL) && (attvalue != NULL)) {
8475 /*
8476 * [ WFC: Unique Att Spec ]
8477 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008478 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008479 */
8480 for (i = 0; i < nbatts;i += 2) {
8481 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008482 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008483 xmlFree(attvalue);
8484 goto failed;
8485 }
8486 }
Owen Taylor3473f882001-02-23 17:55:21 +00008487 /*
8488 * Add the pair to atts
8489 */
8490 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008491 maxatts = 22; /* allow for 10 attrs by default */
8492 atts = (const xmlChar **)
8493 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008494 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008495 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008496 if (attvalue != NULL)
8497 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008498 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008499 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008500 ctxt->atts = atts;
8501 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008502 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008503 const xmlChar **n;
8504
Owen Taylor3473f882001-02-23 17:55:21 +00008505 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008506 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008507 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008508 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008509 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008510 if (attvalue != NULL)
8511 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008512 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008513 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008514 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008515 ctxt->atts = atts;
8516 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008517 }
8518 atts[nbatts++] = attname;
8519 atts[nbatts++] = attvalue;
8520 atts[nbatts] = NULL;
8521 atts[nbatts + 1] = NULL;
8522 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008523 if (attvalue != NULL)
8524 xmlFree(attvalue);
8525 }
8526
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008527failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008528
Daniel Veillard3772de32002-12-17 10:31:45 +00008529 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008530 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8531 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02008532 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008533 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8534 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008535 }
Daniel Veillard02111c12003-02-24 19:14:52 +00008536 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8537 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008538 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8539 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008540 break;
8541 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008542 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008543 GROW;
8544 }
8545
8546 /*
8547 * SAX: Start of Element !
8548 */
8549 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008550 (!ctxt->disableSAX)) {
8551 if (nbatts > 0)
8552 ctxt->sax->startElement(ctxt->userData, name, atts);
8553 else
8554 ctxt->sax->startElement(ctxt->userData, name, NULL);
8555 }
Owen Taylor3473f882001-02-23 17:55:21 +00008556
8557 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008558 /* Free only the content strings */
8559 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008560 if (atts[i] != NULL)
8561 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008562 }
8563 return(name);
8564}
8565
8566/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008567 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008568 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008569 * @line: line of the start tag
8570 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008571 *
8572 * parse an end of tag
8573 *
8574 * [42] ETag ::= '</' Name S? '>'
8575 *
8576 * With namespace
8577 *
8578 * [NS 9] ETag ::= '</' QName S? '>'
8579 */
8580
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008581static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008582xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008583 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008584
8585 GROW;
8586 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008587 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008588 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008589 return;
8590 }
8591 SKIP(2);
8592
Daniel Veillard46de64e2002-05-29 08:21:33 +00008593 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008594
8595 /*
8596 * We should definitely be at the ending "S? '>'" part
8597 */
8598 GROW;
8599 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008600 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008601 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008602 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008603 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008604
8605 /*
8606 * [ WFC: Element Type Match ]
8607 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008608 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008609 *
8610 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008611 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008612 if (name == NULL) name = BAD_CAST "unparseable";
8613 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008614 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008615 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008616 }
8617
8618 /*
8619 * SAX: End of Tag
8620 */
8621 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8622 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008623 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008624
Daniel Veillarde57ec792003-09-10 10:50:59 +00008625 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008626 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008627 return;
8628}
8629
8630/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008631 * xmlParseEndTag:
8632 * @ctxt: an XML parser context
8633 *
8634 * parse an end of tag
8635 *
8636 * [42] ETag ::= '</' Name S? '>'
8637 *
8638 * With namespace
8639 *
8640 * [NS 9] ETag ::= '</' QName S? '>'
8641 */
8642
8643void
8644xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008645 xmlParseEndTag1(ctxt, 0);
8646}
Daniel Veillard81273902003-09-30 00:43:48 +00008647#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008648
8649/************************************************************************
8650 * *
8651 * SAX 2 specific operations *
8652 * *
8653 ************************************************************************/
8654
Daniel Veillard0fb18932003-09-07 09:14:37 +00008655/*
8656 * xmlGetNamespace:
8657 * @ctxt: an XML parser context
8658 * @prefix: the prefix to lookup
8659 *
8660 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008661 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008662 *
8663 * Returns the namespace name or NULL if not bound
8664 */
8665static const xmlChar *
8666xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8667 int i;
8668
Daniel Veillarde57ec792003-09-10 10:50:59 +00008669 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008670 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008671 if (ctxt->nsTab[i] == prefix) {
8672 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8673 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008674 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008675 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008676 return(NULL);
8677}
8678
8679/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008680 * xmlParseQName:
8681 * @ctxt: an XML parser context
8682 * @prefix: pointer to store the prefix part
8683 *
8684 * parse an XML Namespace QName
8685 *
8686 * [6] QName ::= (Prefix ':')? LocalPart
8687 * [7] Prefix ::= NCName
8688 * [8] LocalPart ::= NCName
8689 *
8690 * Returns the Name parsed or NULL
8691 */
8692
8693static const xmlChar *
8694xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8695 const xmlChar *l, *p;
8696
8697 GROW;
8698
8699 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008700 if (l == NULL) {
8701 if (CUR == ':') {
8702 l = xmlParseName(ctxt);
8703 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008704 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008705 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008706 *prefix = NULL;
8707 return(l);
8708 }
8709 }
8710 return(NULL);
8711 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008712 if (CUR == ':') {
8713 NEXT;
8714 p = l;
8715 l = xmlParseNCName(ctxt);
8716 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008717 xmlChar *tmp;
8718
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008719 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8720 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008721 l = xmlParseNmtoken(ctxt);
8722 if (l == NULL)
8723 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8724 else {
8725 tmp = xmlBuildQName(l, p, NULL, 0);
8726 xmlFree((char *)l);
8727 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008728 p = xmlDictLookup(ctxt->dict, tmp, -1);
8729 if (tmp != NULL) xmlFree(tmp);
8730 *prefix = NULL;
8731 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008732 }
8733 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008734 xmlChar *tmp;
8735
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008736 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8737 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008738 NEXT;
8739 tmp = (xmlChar *) xmlParseName(ctxt);
8740 if (tmp != NULL) {
8741 tmp = xmlBuildQName(tmp, l, NULL, 0);
8742 l = xmlDictLookup(ctxt->dict, tmp, -1);
8743 if (tmp != NULL) xmlFree(tmp);
8744 *prefix = p;
8745 return(l);
8746 }
8747 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8748 l = xmlDictLookup(ctxt->dict, tmp, -1);
8749 if (tmp != NULL) xmlFree(tmp);
8750 *prefix = p;
8751 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008752 }
8753 *prefix = p;
8754 } else
8755 *prefix = NULL;
8756 return(l);
8757}
8758
8759/**
8760 * xmlParseQNameAndCompare:
8761 * @ctxt: an XML parser context
8762 * @name: the localname
8763 * @prefix: the prefix, if any.
8764 *
8765 * parse an XML name and compares for match
8766 * (specialized for endtag parsing)
8767 *
8768 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8769 * and the name for mismatch
8770 */
8771
8772static const xmlChar *
8773xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8774 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008775 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008776 const xmlChar *in;
8777 const xmlChar *ret;
8778 const xmlChar *prefix2;
8779
8780 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8781
8782 GROW;
8783 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008784
Daniel Veillard0fb18932003-09-07 09:14:37 +00008785 cmp = prefix;
8786 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008787 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008788 ++cmp;
8789 }
8790 if ((*cmp == 0) && (*in == ':')) {
8791 in++;
8792 cmp = name;
8793 while (*in != 0 && *in == *cmp) {
8794 ++in;
8795 ++cmp;
8796 }
William M. Brack76e95df2003-10-18 16:20:14 +00008797 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008798 /* success */
8799 ctxt->input->cur = in;
8800 return((const xmlChar*) 1);
8801 }
8802 }
8803 /*
8804 * all strings coms from the dictionary, equality can be done directly
8805 */
8806 ret = xmlParseQName (ctxt, &prefix2);
8807 if ((ret == name) && (prefix == prefix2))
8808 return((const xmlChar*) 1);
8809 return ret;
8810}
8811
8812/**
8813 * xmlParseAttValueInternal:
8814 * @ctxt: an XML parser context
8815 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008816 * @alloc: whether the attribute was reallocated as a new string
8817 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008818 *
8819 * parse a value for an attribute.
8820 * NOTE: if no normalization is needed, the routine will return pointers
8821 * directly from the data buffer.
8822 *
8823 * 3.3.3 Attribute-Value Normalization:
8824 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008825 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008826 * - a character reference is processed by appending the referenced
8827 * character to the attribute value
8828 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008829 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008830 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8831 * appending #x20 to the normalized value, except that only a single
8832 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008833 * parsed entity or the literal entity value of an internal parsed entity
8834 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008835 * If the declared value is not CDATA, then the XML processor must further
8836 * process the normalized attribute value by discarding any leading and
8837 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008838 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008839 * All attributes for which no declaration has been read should be treated
8840 * by a non-validating parser as if declared CDATA.
8841 *
8842 * Returns the AttValue parsed or NULL. The value has to be freed by the
8843 * caller if it was copied, this can be detected by val[*len] == 0.
8844 */
8845
8846static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008847xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8848 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008849{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008850 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008851 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008852 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008853 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008854
8855 GROW;
8856 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008857 line = ctxt->input->line;
8858 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008860 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008861 return (NULL);
8862 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008863 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008864
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008865 /*
8866 * try to handle in this routine the most common case where no
8867 * allocation of a new string is required and where content is
8868 * pure ASCII.
8869 */
8870 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008871 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008872 end = ctxt->input->end;
8873 start = in;
8874 if (in >= end) {
8875 const xmlChar *oldbase = ctxt->input->base;
8876 GROW;
8877 if (oldbase != ctxt->input->base) {
8878 long delta = ctxt->input->base - oldbase;
8879 start = start + delta;
8880 in = in + delta;
8881 }
8882 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008883 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008884 if (normalize) {
8885 /*
8886 * Skip any leading spaces
8887 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008888 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008889 ((*in == 0x20) || (*in == 0x9) ||
8890 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008891 if (*in == 0xA) {
8892 line++; col = 1;
8893 } else {
8894 col++;
8895 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008896 in++;
8897 start = in;
8898 if (in >= end) {
8899 const xmlChar *oldbase = ctxt->input->base;
8900 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008901 if (ctxt->instate == XML_PARSER_EOF)
8902 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008903 if (oldbase != ctxt->input->base) {
8904 long delta = ctxt->input->base - oldbase;
8905 start = start + delta;
8906 in = in + delta;
8907 }
8908 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008909 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8910 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8911 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008912 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008913 return(NULL);
8914 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008915 }
8916 }
8917 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8918 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008919 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008920 if ((*in++ == 0x20) && (*in == 0x20)) break;
8921 if (in >= end) {
8922 const xmlChar *oldbase = ctxt->input->base;
8923 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008924 if (ctxt->instate == XML_PARSER_EOF)
8925 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008926 if (oldbase != ctxt->input->base) {
8927 long delta = ctxt->input->base - oldbase;
8928 start = start + delta;
8929 in = in + delta;
8930 }
8931 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008932 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8933 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8934 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008935 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008936 return(NULL);
8937 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008938 }
8939 }
8940 last = in;
8941 /*
8942 * skip the trailing blanks
8943 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008944 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008945 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008946 ((*in == 0x20) || (*in == 0x9) ||
8947 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008948 if (*in == 0xA) {
8949 line++, col = 1;
8950 } else {
8951 col++;
8952 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008953 in++;
8954 if (in >= end) {
8955 const xmlChar *oldbase = ctxt->input->base;
8956 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008957 if (ctxt->instate == XML_PARSER_EOF)
8958 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008959 if (oldbase != ctxt->input->base) {
8960 long delta = ctxt->input->base - oldbase;
8961 start = start + delta;
8962 in = in + delta;
8963 last = last + delta;
8964 }
8965 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008966 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8967 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8968 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008969 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008970 return(NULL);
8971 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008972 }
8973 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008974 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8975 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8976 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008977 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008978 return(NULL);
8979 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008980 if (*in != limit) goto need_complex;
8981 } else {
8982 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8983 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8984 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008985 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008986 if (in >= end) {
8987 const xmlChar *oldbase = ctxt->input->base;
8988 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008989 if (ctxt->instate == XML_PARSER_EOF)
8990 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008991 if (oldbase != ctxt->input->base) {
8992 long delta = ctxt->input->base - oldbase;
8993 start = start + delta;
8994 in = in + delta;
8995 }
8996 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008997 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8998 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8999 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009000 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009001 return(NULL);
9002 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009003 }
9004 }
9005 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009006 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9007 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9008 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009009 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009010 return(NULL);
9011 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009012 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009013 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009014 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009015 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009016 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009017 *len = last - start;
9018 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009019 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009020 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009021 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009022 }
9023 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009024 ctxt->input->line = line;
9025 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009026 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009027 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009028need_complex:
9029 if (alloc) *alloc = 1;
9030 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009031}
9032
9033/**
9034 * xmlParseAttribute2:
9035 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009036 * @pref: the element prefix
9037 * @elem: the element name
9038 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009039 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009040 * @len: an int * to save the length of the attribute
9041 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009042 *
9043 * parse an attribute in the new SAX2 framework.
9044 *
9045 * Returns the attribute name, and the value in *value, .
9046 */
9047
9048static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009049xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009050 const xmlChar * pref, const xmlChar * elem,
9051 const xmlChar ** prefix, xmlChar ** value,
9052 int *len, int *alloc)
9053{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009054 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009055 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009056 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009057
9058 *value = NULL;
9059 GROW;
9060 name = xmlParseQName(ctxt, prefix);
9061 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009062 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9063 "error parsing attribute name\n");
9064 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009065 }
9066
9067 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009068 * get the type if needed
9069 */
9070 if (ctxt->attsSpecial != NULL) {
9071 int type;
9072
9073 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009074 pref, elem, *prefix, name);
9075 if (type != 0)
9076 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009077 }
9078
9079 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009080 * read the value
9081 */
9082 SKIP_BLANKS;
9083 if (RAW == '=') {
9084 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009085 SKIP_BLANKS;
9086 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9087 if (normalize) {
9088 /*
9089 * Sometimes a second normalisation pass for spaces is needed
9090 * but that only happens if charrefs or entities refernces
9091 * have been used in the attribute value, i.e. the attribute
9092 * value have been extracted in an allocated string already.
9093 */
9094 if (*alloc) {
9095 const xmlChar *val2;
9096
9097 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009098 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009099 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009100 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009101 }
9102 }
9103 }
9104 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009105 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009106 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +02009107 "Specification mandates value for attribute %s\n",
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009108 name);
9109 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009110 }
9111
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009112 if (*prefix == ctxt->str_xml) {
9113 /*
9114 * Check that xml:lang conforms to the specification
9115 * No more registered as an error, just generate a warning now
9116 * since this was deprecated in XML second edition
9117 */
9118 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9119 internal_val = xmlStrndup(val, *len);
9120 if (!xmlCheckLanguageID(internal_val)) {
9121 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9122 "Malformed value for xml:lang : %s\n",
9123 internal_val, NULL);
9124 }
9125 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009126
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009127 /*
9128 * Check that xml:space conforms to the specification
9129 */
9130 if (xmlStrEqual(name, BAD_CAST "space")) {
9131 internal_val = xmlStrndup(val, *len);
9132 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9133 *(ctxt->space) = 0;
9134 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9135 *(ctxt->space) = 1;
9136 else {
9137 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9138 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9139 internal_val, NULL);
9140 }
9141 }
9142 if (internal_val) {
9143 xmlFree(internal_val);
9144 }
9145 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009146
9147 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009148 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009149}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009150/**
9151 * xmlParseStartTag2:
9152 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009153 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009154 * parse a start of tag either for rule element or
9155 * EmptyElement. In both case we don't parse the tag closing chars.
9156 * This routine is called when running SAX2 parsing
9157 *
9158 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9159 *
9160 * [ WFC: Unique Att Spec ]
9161 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009162 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009163 *
9164 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9165 *
9166 * [ WFC: Unique Att Spec ]
9167 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009168 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009169 *
9170 * With namespace:
9171 *
9172 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9173 *
9174 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9175 *
9176 * Returns the element name parsed
9177 */
9178
9179static const xmlChar *
9180xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009181 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009182 const xmlChar *localname;
9183 const xmlChar *prefix;
9184 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009185 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009186 const xmlChar *nsname;
9187 xmlChar *attvalue;
9188 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009189 int maxatts = ctxt->maxatts;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009190 int nratts, nbatts, nbdef, inputid;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009191 int i, j, nbNs, attval;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009192 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009193 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009194
9195 if (RAW != '<') return(NULL);
9196 NEXT1;
9197
9198 /*
9199 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9200 * point since the attribute values may be stored as pointers to
9201 * the buffer and calling SHRINK would destroy them !
9202 * The Shrinking is only possible once the full set of attribute
9203 * callbacks have been done.
9204 */
9205 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009206 cur = ctxt->input->cur - ctxt->input->base;
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009207 inputid = ctxt->input->id;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009208 nbatts = 0;
9209 nratts = 0;
9210 nbdef = 0;
9211 nbNs = 0;
9212 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009213 /* Forget any namespaces added during an earlier parse of this element. */
9214 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009215
9216 localname = xmlParseQName(ctxt, &prefix);
9217 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009218 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9219 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009220 return(NULL);
9221 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009222 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009223
9224 /*
9225 * Now parse the attributes, it ends up with the ending
9226 *
9227 * (S Attribute)* S?
9228 */
9229 SKIP_BLANKS;
9230 GROW;
9231
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009232 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009233 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009234 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009235 const xmlChar *q = CUR_PTR;
9236 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009237 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009238
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009239 attname = xmlParseAttribute2(ctxt, prefix, localname,
9240 &aprefix, &attvalue, &len, &alloc);
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009241 if ((attname == NULL) || (attvalue == NULL))
9242 goto next_attr;
9243 if (len < 0) len = xmlStrlen(attvalue);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009244
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009245 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9246 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9247 xmlURIPtr uri;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009248
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009249 if (URL == NULL) {
9250 xmlErrMemory(ctxt, "dictionary allocation failure");
9251 if ((attvalue != NULL) && (alloc != 0))
9252 xmlFree(attvalue);
9253 return(NULL);
9254 }
9255 if (*URL != 0) {
9256 uri = xmlParseURI((const char *) URL);
9257 if (uri == NULL) {
9258 xmlNsErr(ctxt, XML_WAR_NS_URI,
9259 "xmlns: '%s' is not a valid URI\n",
9260 URL, NULL, NULL);
9261 } else {
9262 if (uri->scheme == NULL) {
9263 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9264 "xmlns: URI %s is not absolute\n",
9265 URL, NULL, NULL);
9266 }
9267 xmlFreeURI(uri);
9268 }
Daniel Veillard37334572008-07-31 08:20:02 +00009269 if (URL == ctxt->str_xml_ns) {
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009270 if (attname != ctxt->str_xml) {
9271 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9272 "xml namespace URI cannot be the default namespace\n",
9273 NULL, NULL, NULL);
9274 }
9275 goto next_attr;
9276 }
9277 if ((len == 29) &&
9278 (xmlStrEqual(URL,
9279 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9280 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9281 "reuse of the xmlns namespace name is forbidden\n",
9282 NULL, NULL, NULL);
9283 goto next_attr;
9284 }
9285 }
9286 /*
9287 * check that it's not a defined namespace
9288 */
9289 for (j = 1;j <= nbNs;j++)
9290 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9291 break;
9292 if (j <= nbNs)
9293 xmlErrAttributeDup(ctxt, NULL, attname);
9294 else
9295 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009296
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009297 } else if (aprefix == ctxt->str_xmlns) {
9298 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9299 xmlURIPtr uri;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009300
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009301 if (attname == ctxt->str_xml) {
9302 if (URL != ctxt->str_xml_ns) {
9303 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9304 "xml namespace prefix mapped to wrong URI\n",
9305 NULL, NULL, NULL);
9306 }
9307 /*
9308 * Do not keep a namespace definition node
9309 */
9310 goto next_attr;
9311 }
9312 if (URL == ctxt->str_xml_ns) {
9313 if (attname != ctxt->str_xml) {
9314 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9315 "xml namespace URI mapped to wrong prefix\n",
9316 NULL, NULL, NULL);
9317 }
9318 goto next_attr;
9319 }
9320 if (attname == ctxt->str_xmlns) {
9321 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9322 "redefinition of the xmlns prefix is forbidden\n",
9323 NULL, NULL, NULL);
9324 goto next_attr;
9325 }
9326 if ((len == 29) &&
9327 (xmlStrEqual(URL,
9328 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9329 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9330 "reuse of the xmlns namespace name is forbidden\n",
9331 NULL, NULL, NULL);
9332 goto next_attr;
9333 }
9334 if ((URL == NULL) || (URL[0] == 0)) {
9335 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9336 "xmlns:%s: Empty XML namespace is not allowed\n",
9337 attname, NULL, NULL);
9338 goto next_attr;
9339 } else {
9340 uri = xmlParseURI((const char *) URL);
9341 if (uri == NULL) {
9342 xmlNsErr(ctxt, XML_WAR_NS_URI,
9343 "xmlns:%s: '%s' is not a valid URI\n",
9344 attname, URL, NULL);
9345 } else {
9346 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9347 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9348 "xmlns:%s: URI %s is not absolute\n",
9349 attname, URL, NULL);
9350 }
9351 xmlFreeURI(uri);
9352 }
9353 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009354
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009355 /*
9356 * check that it's not a defined namespace
9357 */
9358 for (j = 1;j <= nbNs;j++)
9359 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9360 break;
9361 if (j <= nbNs)
9362 xmlErrAttributeDup(ctxt, aprefix, attname);
9363 else
9364 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9365
9366 } else {
9367 /*
9368 * Add the pair to atts
9369 */
9370 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9371 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9372 goto next_attr;
9373 }
9374 maxatts = ctxt->maxatts;
9375 atts = ctxt->atts;
9376 }
9377 ctxt->attallocs[nratts++] = alloc;
9378 atts[nbatts++] = attname;
9379 atts[nbatts++] = aprefix;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009380 /*
9381 * The namespace URI field is used temporarily to point at the
9382 * base of the current input buffer for non-alloced attributes.
9383 * When the input buffer is reallocated, all the pointers become
9384 * invalid, but they can be reconstructed later.
9385 */
9386 if (alloc)
9387 atts[nbatts++] = NULL;
9388 else
9389 atts[nbatts++] = ctxt->input->base;
Nick Wellnhofer07b74282017-06-01 00:19:14 +02009390 atts[nbatts++] = attvalue;
9391 attvalue += len;
9392 atts[nbatts++] = attvalue;
9393 /*
9394 * tag if some deallocation is needed
9395 */
9396 if (alloc != 0) attval = 1;
9397 attvalue = NULL; /* moved into atts */
9398 }
9399
9400next_attr:
9401 if ((attvalue != NULL) && (alloc != 0)) {
9402 xmlFree(attvalue);
9403 attvalue = NULL;
9404 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009405
9406 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009407 if (ctxt->instate == XML_PARSER_EOF)
9408 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009409 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9410 break;
Nick Wellnhoferd9e43c72017-06-19 18:01:23 +02009411 if (SKIP_BLANKS == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009412 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9413 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009414 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009415 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009416 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9417 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009418 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009419 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009420 break;
9421 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009422 GROW;
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009423 }
9424
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009425 if (ctxt->input->id != inputid) {
9426 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9427 "Unexpected change of input\n");
9428 localname = NULL;
9429 goto done;
9430 }
9431
Nick Wellnhofer855c19e2017-06-01 01:04:08 +02009432 /* Reconstruct attribute value pointers. */
9433 for (i = 0, j = 0; j < nratts; i += 5, j++) {
9434 if (atts[i+2] != NULL) {
9435 /*
9436 * Arithmetic on dangling pointers is technically undefined
9437 * behavior, but well...
9438 */
9439 ptrdiff_t offset = ctxt->input->base - atts[i+2];
9440 atts[i+2] = NULL; /* Reset repurposed namespace URI */
9441 atts[i+3] += offset; /* value */
9442 atts[i+4] += offset; /* valuend */
9443 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009444 }
9445
Daniel Veillard0fb18932003-09-07 09:14:37 +00009446 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009447 * The attributes defaulting
9448 */
9449 if (ctxt->attsDefault != NULL) {
9450 xmlDefAttrsPtr defaults;
9451
9452 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9453 if (defaults != NULL) {
9454 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009455 attname = defaults->values[5 * i];
9456 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009457
9458 /*
9459 * special work for namespaces defaulted defs
9460 */
9461 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9462 /*
9463 * check that it's not a defined namespace
9464 */
9465 for (j = 1;j <= nbNs;j++)
9466 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9467 break;
9468 if (j <= nbNs) continue;
9469
9470 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009471 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009472 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009473 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009474 nbNs++;
9475 }
9476 } else if (aprefix == ctxt->str_xmlns) {
9477 /*
9478 * check that it's not a defined namespace
9479 */
9480 for (j = 1;j <= nbNs;j++)
9481 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9482 break;
9483 if (j <= nbNs) continue;
9484
9485 nsname = xmlGetNamespace(ctxt, attname);
9486 if (nsname != defaults->values[2]) {
9487 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009488 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009489 nbNs++;
9490 }
9491 } else {
9492 /*
9493 * check that it's not a defined attribute
9494 */
9495 for (j = 0;j < nbatts;j+=5) {
9496 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9497 break;
9498 }
9499 if (j < nbatts) continue;
9500
9501 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9502 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009503 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009504 }
9505 maxatts = ctxt->maxatts;
9506 atts = ctxt->atts;
9507 }
9508 atts[nbatts++] = attname;
9509 atts[nbatts++] = aprefix;
9510 if (aprefix == NULL)
9511 atts[nbatts++] = NULL;
9512 else
9513 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009514 atts[nbatts++] = defaults->values[5 * i + 2];
9515 atts[nbatts++] = defaults->values[5 * i + 3];
9516 if ((ctxt->standalone == 1) &&
9517 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009518 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009519 "standalone: attribute %s on %s defaulted from external subset\n",
9520 attname, localname);
9521 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009522 nbdef++;
9523 }
9524 }
9525 }
9526 }
9527
Daniel Veillarde70c8772003-11-25 07:21:18 +00009528 /*
9529 * The attributes checkings
9530 */
9531 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009532 /*
9533 * The default namespace does not apply to attribute names.
9534 */
9535 if (atts[i + 1] != NULL) {
9536 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9537 if (nsname == NULL) {
9538 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9539 "Namespace prefix %s for %s on %s is not defined\n",
9540 atts[i + 1], atts[i], localname);
9541 }
9542 atts[i + 2] = nsname;
9543 } else
9544 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009545 /*
9546 * [ WFC: Unique Att Spec ]
9547 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009548 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009549 * As extended by the Namespace in XML REC.
9550 */
9551 for (j = 0; j < i;j += 5) {
9552 if (atts[i] == atts[j]) {
9553 if (atts[i+1] == atts[j+1]) {
9554 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9555 break;
9556 }
9557 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9558 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9559 "Namespaced Attribute %s in '%s' redefined\n",
9560 atts[i], nsname, NULL);
9561 break;
9562 }
9563 }
9564 }
9565 }
9566
Daniel Veillarde57ec792003-09-10 10:50:59 +00009567 nsname = xmlGetNamespace(ctxt, prefix);
9568 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009569 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9570 "Namespace prefix %s on %s is not defined\n",
9571 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009572 }
9573 *pref = prefix;
9574 *URI = nsname;
9575
9576 /*
9577 * SAX: Start of Element !
9578 */
9579 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9580 (!ctxt->disableSAX)) {
9581 if (nbNs > 0)
9582 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9583 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9584 nbatts / 5, nbdef, atts);
9585 else
9586 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9587 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9588 }
9589
Nick Wellnhofer5f440d82017-06-12 14:32:34 +02009590done:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009591 /*
9592 * Free up attribute allocated strings if needed
9593 */
9594 if (attval != 0) {
9595 for (i = 3,j = 0; j < nratts;i += 5,j++)
9596 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9597 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009598 }
9599
9600 return(localname);
9601}
9602
9603/**
9604 * xmlParseEndTag2:
9605 * @ctxt: an XML parser context
9606 * @line: line of the start tag
9607 * @nsNr: number of namespaces on the start tag
9608 *
9609 * parse an end of tag
9610 *
9611 * [42] ETag ::= '</' Name S? '>'
9612 *
9613 * With namespace
9614 *
9615 * [NS 9] ETag ::= '</' QName S? '>'
9616 */
9617
9618static void
9619xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009620 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009621 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009622 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009623
9624 GROW;
9625 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009626 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009627 return;
9628 }
9629 SKIP(2);
9630
David Kilzerdb07dd62016-02-12 09:58:29 -08009631 curLength = ctxt->input->end - ctxt->input->cur;
9632 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9633 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9634 if ((curLength >= (size_t)(tlen + 1)) &&
9635 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009636 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009637 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009638 goto done;
9639 }
9640 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009641 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009642 name = (xmlChar*)1;
9643 } else {
9644 if (prefix == NULL)
9645 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9646 else
9647 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9648 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009649
9650 /*
9651 * We should definitely be at the ending "S? '>'" part
9652 */
9653 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009654 if (ctxt->instate == XML_PARSER_EOF)
9655 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009656 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009657 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009658 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009659 } else
9660 NEXT1;
9661
9662 /*
9663 * [ WFC: Element Type Match ]
9664 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009665 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009666 *
9667 */
9668 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009669 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009670 if ((line == 0) && (ctxt->node != NULL))
9671 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009672 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009673 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009674 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009675 }
9676
9677 /*
9678 * SAX: End of Tag
9679 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009680done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009681 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9682 (!ctxt->disableSAX))
9683 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9684
Daniel Veillard0fb18932003-09-07 09:14:37 +00009685 spacePop(ctxt);
9686 if (nsNr != 0)
9687 nsPop(ctxt, nsNr);
9688 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009689}
9690
9691/**
Owen Taylor3473f882001-02-23 17:55:21 +00009692 * xmlParseCDSect:
9693 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009694 *
Owen Taylor3473f882001-02-23 17:55:21 +00009695 * Parse escaped pure raw content.
9696 *
9697 * [18] CDSect ::= CDStart CData CDEnd
9698 *
9699 * [19] CDStart ::= '<![CDATA['
9700 *
9701 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9702 *
9703 * [21] CDEnd ::= ']]>'
9704 */
9705void
9706xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9707 xmlChar *buf = NULL;
9708 int len = 0;
9709 int size = XML_PARSER_BUFFER_SIZE;
9710 int r, rl;
9711 int s, sl;
9712 int cur, l;
9713 int count = 0;
9714
Daniel Veillard8f597c32003-10-06 08:19:27 +00009715 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009716 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009717 SKIP(9);
9718 } else
9719 return;
9720
9721 ctxt->instate = XML_PARSER_CDATA_SECTION;
9722 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009723 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009724 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009725 ctxt->instate = XML_PARSER_CONTENT;
9726 return;
9727 }
9728 NEXTL(rl);
9729 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009730 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009731 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009732 ctxt->instate = XML_PARSER_CONTENT;
9733 return;
9734 }
9735 NEXTL(sl);
9736 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009737 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009738 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009739 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009740 return;
9741 }
William M. Brack871611b2003-10-18 04:53:14 +00009742 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009743 ((r != ']') || (s != ']') || (cur != '>'))) {
9744 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009745 xmlChar *tmp;
9746
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009747 if ((size > XML_MAX_TEXT_LENGTH) &&
9748 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9749 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9750 "CData section too big found", NULL);
9751 xmlFree (buf);
9752 return;
9753 }
9754 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009755 if (tmp == NULL) {
9756 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009757 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009758 return;
9759 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009760 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009761 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009762 }
9763 COPY_BUF(rl,buf,len,r);
9764 r = s;
9765 rl = sl;
9766 s = cur;
9767 sl = l;
9768 count++;
9769 if (count > 50) {
9770 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009771 if (ctxt->instate == XML_PARSER_EOF) {
9772 xmlFree(buf);
9773 return;
9774 }
Owen Taylor3473f882001-02-23 17:55:21 +00009775 count = 0;
9776 }
9777 NEXTL(l);
9778 cur = CUR_CHAR(l);
9779 }
9780 buf[len] = 0;
9781 ctxt->instate = XML_PARSER_CONTENT;
9782 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009783 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009784 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009785 xmlFree(buf);
9786 return;
9787 }
9788 NEXTL(l);
9789
9790 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009791 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009792 */
9793 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9794 if (ctxt->sax->cdataBlock != NULL)
9795 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009796 else if (ctxt->sax->characters != NULL)
9797 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009798 }
9799 xmlFree(buf);
9800}
9801
9802/**
9803 * xmlParseContent:
9804 * @ctxt: an XML parser context
9805 *
9806 * Parse a content:
9807 *
9808 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9809 */
9810
9811void
9812xmlParseContent(xmlParserCtxtPtr ctxt) {
9813 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009814 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009815 ((RAW != '<') || (NXT(1) != '/')) &&
9816 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009817 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009818 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009819 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009820
9821 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009822 * First case : a Processing Instruction.
9823 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009824 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009825 xmlParsePI(ctxt);
9826 }
9827
9828 /*
9829 * Second case : a CDSection
9830 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009831 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009832 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009833 xmlParseCDSect(ctxt);
9834 }
9835
9836 /*
9837 * Third case : a comment
9838 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009839 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009840 (NXT(2) == '-') && (NXT(3) == '-')) {
9841 xmlParseComment(ctxt);
9842 ctxt->instate = XML_PARSER_CONTENT;
9843 }
9844
9845 /*
9846 * Fourth case : a sub-element.
9847 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009848 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009849 xmlParseElement(ctxt);
9850 }
9851
9852 /*
9853 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009854 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009855 */
9856
Daniel Veillard21a0f912001-02-25 19:54:14 +00009857 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009858 xmlParseReference(ctxt);
9859 }
9860
9861 /*
9862 * Last case, text. Note that References are handled directly.
9863 */
9864 else {
9865 xmlParseCharData(ctxt, 0);
9866 }
9867
9868 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00009869 SHRINK;
9870
Daniel Veillardfdc91562002-07-01 21:52:03 +00009871 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009872 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9873 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +08009874 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009875 break;
9876 }
9877 }
9878}
9879
9880/**
9881 * xmlParseElement:
9882 * @ctxt: an XML parser context
9883 *
9884 * parse an XML element, this is highly recursive
9885 *
9886 * [39] element ::= EmptyElemTag | STag content ETag
9887 *
9888 * [ WFC: Element Type Match ]
9889 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009890 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009891 *
Owen Taylor3473f882001-02-23 17:55:21 +00009892 */
9893
9894void
9895xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009896 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009897 const xmlChar *prefix = NULL;
9898 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009899 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009900 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009901 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009902 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009903
Daniel Veillard8915c152008-08-26 13:05:34 +00009904 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9905 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9906 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9907 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9908 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08009909 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009910 return;
9911 }
9912
Owen Taylor3473f882001-02-23 17:55:21 +00009913 /* Capture start position */
9914 if (ctxt->record_info) {
9915 node_info.begin_pos = ctxt->input->consumed +
9916 (CUR_PTR - ctxt->input->base);
9917 node_info.begin_line = ctxt->input->line;
9918 }
9919
9920 if (ctxt->spaceNr == 0)
9921 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009922 else if (*ctxt->space == -2)
9923 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009924 else
9925 spacePush(ctxt, *ctxt->space);
9926
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009927 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009928#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009929 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009930#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009931 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009932#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009933 else
9934 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009935#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009936 if (ctxt->instate == XML_PARSER_EOF)
9937 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009938 if (name == NULL) {
9939 spacePop(ctxt);
9940 return;
9941 }
9942 namePush(ctxt, name);
9943 ret = ctxt->node;
9944
Daniel Veillard4432df22003-09-28 18:58:27 +00009945#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009946 /*
9947 * [ VC: Root Element Type ]
9948 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009949 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009950 */
9951 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9952 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9953 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009954#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009955
9956 /*
9957 * Check for an Empty Element.
9958 */
9959 if ((RAW == '/') && (NXT(1) == '>')) {
9960 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009961 if (ctxt->sax2) {
9962 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9963 (!ctxt->disableSAX))
9964 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009965#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009966 } else {
9967 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9968 (!ctxt->disableSAX))
9969 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009970#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009971 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009972 namePop(ctxt);
9973 spacePop(ctxt);
9974 if (nsNr != ctxt->nsNr)
9975 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009976 if ( ret != NULL && ctxt->record_info ) {
9977 node_info.end_pos = ctxt->input->consumed +
9978 (CUR_PTR - ctxt->input->base);
9979 node_info.end_line = ctxt->input->line;
9980 node_info.node = ret;
9981 xmlParserAddNodeInfo(ctxt, &node_info);
9982 }
9983 return;
9984 }
9985 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009986 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009987 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009988 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9989 "Couldn't find end of Start Tag %s line %d\n",
9990 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009991
9992 /*
9993 * end of parsing of this node.
9994 */
9995 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009996 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009997 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009998 if (nsNr != ctxt->nsNr)
9999 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010000
10001 /*
10002 * Capture end position and add node
10003 */
10004 if ( ret != NULL && ctxt->record_info ) {
10005 node_info.end_pos = ctxt->input->consumed +
10006 (CUR_PTR - ctxt->input->base);
10007 node_info.end_line = ctxt->input->line;
10008 node_info.node = ret;
10009 xmlParserAddNodeInfo(ctxt, &node_info);
10010 }
10011 return;
10012 }
10013
10014 /*
10015 * Parse the content of the element:
10016 */
10017 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010018 if (ctxt->instate == XML_PARSER_EOF)
10019 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010020 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010021 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010022 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010023 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010024
10025 /*
10026 * end of parsing of this node.
10027 */
10028 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010029 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010030 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010031 if (nsNr != ctxt->nsNr)
10032 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010033 return;
10034 }
10035
10036 /*
10037 * parse the end of tag: '</' should be here.
10038 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010039 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010040 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010041 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010042 }
10043#ifdef LIBXML_SAX1_ENABLED
10044 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010045 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010046#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010047
10048 /*
10049 * Capture end position and add node
10050 */
10051 if ( ret != NULL && ctxt->record_info ) {
10052 node_info.end_pos = ctxt->input->consumed +
10053 (CUR_PTR - ctxt->input->base);
10054 node_info.end_line = ctxt->input->line;
10055 node_info.node = ret;
10056 xmlParserAddNodeInfo(ctxt, &node_info);
10057 }
10058}
10059
10060/**
10061 * xmlParseVersionNum:
10062 * @ctxt: an XML parser context
10063 *
10064 * parse the XML version value.
10065 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010066 * [26] VersionNum ::= '1.' [0-9]+
10067 *
10068 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010069 *
10070 * Returns the string giving the XML version number, or NULL
10071 */
10072xmlChar *
10073xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10074 xmlChar *buf = NULL;
10075 int len = 0;
10076 int size = 10;
10077 xmlChar cur;
10078
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010079 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010080 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010081 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010082 return(NULL);
10083 }
10084 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010085 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010086 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010087 return(NULL);
10088 }
10089 buf[len++] = cur;
10090 NEXT;
10091 cur=CUR;
10092 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010093 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010094 return(NULL);
10095 }
10096 buf[len++] = cur;
10097 NEXT;
10098 cur=CUR;
10099 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010100 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010101 xmlChar *tmp;
10102
Owen Taylor3473f882001-02-23 17:55:21 +000010103 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010104 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10105 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010106 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010107 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010108 return(NULL);
10109 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010110 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010111 }
10112 buf[len++] = cur;
10113 NEXT;
10114 cur=CUR;
10115 }
10116 buf[len] = 0;
10117 return(buf);
10118}
10119
10120/**
10121 * xmlParseVersionInfo:
10122 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010123 *
Owen Taylor3473f882001-02-23 17:55:21 +000010124 * parse the XML version.
10125 *
10126 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010127 *
Owen Taylor3473f882001-02-23 17:55:21 +000010128 * [25] Eq ::= S? '=' S?
10129 *
10130 * Returns the version string, e.g. "1.0"
10131 */
10132
10133xmlChar *
10134xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10135 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010136
Daniel Veillarda07050d2003-10-19 14:46:32 +000010137 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010138 SKIP(7);
10139 SKIP_BLANKS;
10140 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010141 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010142 return(NULL);
10143 }
10144 NEXT;
10145 SKIP_BLANKS;
10146 if (RAW == '"') {
10147 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010148 version = xmlParseVersionNum(ctxt);
10149 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010150 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010151 } else
10152 NEXT;
10153 } else if (RAW == '\''){
10154 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010155 version = xmlParseVersionNum(ctxt);
10156 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010157 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010158 } else
10159 NEXT;
10160 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010161 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010162 }
10163 }
10164 return(version);
10165}
10166
10167/**
10168 * xmlParseEncName:
10169 * @ctxt: an XML parser context
10170 *
10171 * parse the XML encoding name
10172 *
10173 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10174 *
10175 * Returns the encoding name value or NULL
10176 */
10177xmlChar *
10178xmlParseEncName(xmlParserCtxtPtr ctxt) {
10179 xmlChar *buf = NULL;
10180 int len = 0;
10181 int size = 10;
10182 xmlChar cur;
10183
10184 cur = CUR;
10185 if (((cur >= 'a') && (cur <= 'z')) ||
10186 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010187 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010188 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010189 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010190 return(NULL);
10191 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010192
Owen Taylor3473f882001-02-23 17:55:21 +000010193 buf[len++] = cur;
10194 NEXT;
10195 cur = CUR;
10196 while (((cur >= 'a') && (cur <= 'z')) ||
10197 ((cur >= 'A') && (cur <= 'Z')) ||
10198 ((cur >= '0') && (cur <= '9')) ||
10199 (cur == '.') || (cur == '_') ||
10200 (cur == '-')) {
10201 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010202 xmlChar *tmp;
10203
Owen Taylor3473f882001-02-23 17:55:21 +000010204 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010205 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10206 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010207 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010208 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010209 return(NULL);
10210 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010211 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010212 }
10213 buf[len++] = cur;
10214 NEXT;
10215 cur = CUR;
10216 if (cur == 0) {
10217 SHRINK;
10218 GROW;
10219 cur = CUR;
10220 }
10221 }
10222 buf[len] = 0;
10223 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010224 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010225 }
10226 return(buf);
10227}
10228
10229/**
10230 * xmlParseEncodingDecl:
10231 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010232 *
Owen Taylor3473f882001-02-23 17:55:21 +000010233 * parse the XML encoding declaration
10234 *
10235 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10236 *
10237 * this setups the conversion filters.
10238 *
10239 * Returns the encoding value or NULL
10240 */
10241
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010242const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010243xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10244 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010245
10246 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010247 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010248 SKIP(8);
10249 SKIP_BLANKS;
10250 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010251 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010252 return(NULL);
10253 }
10254 NEXT;
10255 SKIP_BLANKS;
10256 if (RAW == '"') {
10257 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010258 encoding = xmlParseEncName(ctxt);
10259 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010260 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010261 xmlFree((xmlChar *) encoding);
10262 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010263 } else
10264 NEXT;
10265 } else if (RAW == '\''){
10266 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010267 encoding = xmlParseEncName(ctxt);
10268 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010269 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010270 xmlFree((xmlChar *) encoding);
10271 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010272 } else
10273 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010274 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010275 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010276 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010277
10278 /*
10279 * Non standard parsing, allowing the user to ignore encoding
10280 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010281 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10282 xmlFree((xmlChar *) encoding);
10283 return(NULL);
10284 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010285
Daniel Veillard6b621b82003-08-11 15:03:34 +000010286 /*
10287 * UTF-16 encoding stwich has already taken place at this stage,
10288 * more over the little-endian/big-endian selection is already done
10289 */
10290 if ((encoding != NULL) &&
10291 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10292 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010293 /*
10294 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010295 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010296 * document is apparently UTF-8 compatible, then raise an
10297 * encoding mismatch fatal error
10298 */
10299 if ((ctxt->encoding == NULL) &&
10300 (ctxt->input->buf != NULL) &&
10301 (ctxt->input->buf->encoder == NULL)) {
10302 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10303 "Document labelled UTF-16 but has UTF-8 content\n");
10304 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010305 if (ctxt->encoding != NULL)
10306 xmlFree((xmlChar *) ctxt->encoding);
10307 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010308 }
10309 /*
10310 * UTF-8 encoding is handled natively
10311 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010312 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010313 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10314 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010315 if (ctxt->encoding != NULL)
10316 xmlFree((xmlChar *) ctxt->encoding);
10317 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010318 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010319 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010320 xmlCharEncodingHandlerPtr handler;
10321
10322 if (ctxt->input->encoding != NULL)
10323 xmlFree((xmlChar *) ctxt->input->encoding);
10324 ctxt->input->encoding = encoding;
10325
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010326 handler = xmlFindCharEncodingHandler((const char *) encoding);
10327 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010328 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10329 /* failed to convert */
10330 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10331 return(NULL);
10332 }
Owen Taylor3473f882001-02-23 17:55:21 +000010333 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010334 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010335 "Unsupported encoding %s\n", encoding);
10336 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010337 }
10338 }
10339 }
10340 return(encoding);
10341}
10342
10343/**
10344 * xmlParseSDDecl:
10345 * @ctxt: an XML parser context
10346 *
10347 * parse the XML standalone declaration
10348 *
10349 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010350 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010351 *
10352 * [ VC: Standalone Document Declaration ]
10353 * TODO The standalone document declaration must have the value "no"
10354 * if any external markup declarations contain declarations of:
10355 * - attributes with default values, if elements to which these
10356 * attributes apply appear in the document without specifications
10357 * of values for these attributes, or
10358 * - entities (other than amp, lt, gt, apos, quot), if references
10359 * to those entities appear in the document, or
10360 * - attributes with values subject to normalization, where the
10361 * attribute appears in the document with a value which will change
10362 * as a result of normalization, or
10363 * - element types with element content, if white space occurs directly
10364 * within any instance of those types.
10365 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010366 * Returns:
10367 * 1 if standalone="yes"
10368 * 0 if standalone="no"
10369 * -2 if standalone attribute is missing or invalid
10370 * (A standalone value of -2 means that the XML declaration was found,
10371 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010372 */
10373
10374int
10375xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010376 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010377
10378 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010379 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010380 SKIP(10);
10381 SKIP_BLANKS;
10382 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010383 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010384 return(standalone);
10385 }
10386 NEXT;
10387 SKIP_BLANKS;
10388 if (RAW == '\''){
10389 NEXT;
10390 if ((RAW == 'n') && (NXT(1) == 'o')) {
10391 standalone = 0;
10392 SKIP(2);
10393 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10394 (NXT(2) == 's')) {
10395 standalone = 1;
10396 SKIP(3);
10397 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010398 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010399 }
10400 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010401 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010402 } else
10403 NEXT;
10404 } else if (RAW == '"'){
10405 NEXT;
10406 if ((RAW == 'n') && (NXT(1) == 'o')) {
10407 standalone = 0;
10408 SKIP(2);
10409 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10410 (NXT(2) == 's')) {
10411 standalone = 1;
10412 SKIP(3);
10413 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010414 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010415 }
10416 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010417 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010418 } else
10419 NEXT;
10420 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010421 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010422 }
10423 }
10424 return(standalone);
10425}
10426
10427/**
10428 * xmlParseXMLDecl:
10429 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010430 *
Owen Taylor3473f882001-02-23 17:55:21 +000010431 * parse an XML declaration header
10432 *
10433 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10434 */
10435
10436void
10437xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10438 xmlChar *version;
10439
10440 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010441 * This value for standalone indicates that the document has an
10442 * XML declaration but it does not have a standalone attribute.
10443 * It will be overwritten later if a standalone attribute is found.
10444 */
10445 ctxt->input->standalone = -2;
10446
10447 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010448 * We know that '<?xml' is here.
10449 */
10450 SKIP(5);
10451
William M. Brack76e95df2003-10-18 16:20:14 +000010452 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10454 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010455 }
10456 SKIP_BLANKS;
10457
10458 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010459 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010460 */
10461 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010462 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010463 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010464 } else {
10465 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10466 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010467 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010468 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010469 if (ctxt->options & XML_PARSE_OLD10) {
10470 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10471 "Unsupported version '%s'\n",
10472 version);
10473 } else {
10474 if ((version[0] == '1') && ((version[1] == '.'))) {
10475 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10476 "Unsupported version '%s'\n",
10477 version, NULL);
10478 } else {
10479 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10480 "Unsupported version '%s'\n",
10481 version);
10482 }
10483 }
Daniel Veillard19840942001-11-29 16:11:38 +000010484 }
10485 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010486 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010487 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010488 }
Owen Taylor3473f882001-02-23 17:55:21 +000010489
10490 /*
10491 * We may have the encoding declaration
10492 */
William M. Brack76e95df2003-10-18 16:20:14 +000010493 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010494 if ((RAW == '?') && (NXT(1) == '>')) {
10495 SKIP(2);
10496 return;
10497 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010498 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010499 }
10500 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010501 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10502 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010503 /*
10504 * The XML REC instructs us to stop parsing right here
10505 */
10506 return;
10507 }
10508
10509 /*
10510 * We may have the standalone status.
10511 */
William M. Brack76e95df2003-10-18 16:20:14 +000010512 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010513 if ((RAW == '?') && (NXT(1) == '>')) {
10514 SKIP(2);
10515 return;
10516 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010517 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010518 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010519
10520 /*
10521 * We can grow the input buffer freely at that point
10522 */
10523 GROW;
10524
Owen Taylor3473f882001-02-23 17:55:21 +000010525 SKIP_BLANKS;
10526 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10527
10528 SKIP_BLANKS;
10529 if ((RAW == '?') && (NXT(1) == '>')) {
10530 SKIP(2);
10531 } else if (RAW == '>') {
10532 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010534 NEXT;
10535 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010536 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010537 MOVETO_ENDTAG(CUR_PTR);
10538 NEXT;
10539 }
10540}
10541
10542/**
10543 * xmlParseMisc:
10544 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010545 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010546 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010547 *
10548 * [27] Misc ::= Comment | PI | S
10549 */
10550
10551void
10552xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010553 while ((ctxt->instate != XML_PARSER_EOF) &&
10554 (((RAW == '<') && (NXT(1) == '?')) ||
10555 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10556 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010557 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010558 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010559 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010560 NEXT;
10561 } else
10562 xmlParseComment(ctxt);
10563 }
10564}
10565
10566/**
10567 * xmlParseDocument:
10568 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010569 *
Owen Taylor3473f882001-02-23 17:55:21 +000010570 * parse an XML document (and build a tree if using the standard SAX
10571 * interface).
10572 *
10573 * [1] document ::= prolog element Misc*
10574 *
10575 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10576 *
10577 * Returns 0, -1 in case of error. the parser context is augmented
10578 * as a result of the parsing.
10579 */
10580
10581int
10582xmlParseDocument(xmlParserCtxtPtr ctxt) {
10583 xmlChar start[4];
10584 xmlCharEncoding enc;
10585
10586 xmlInitParser();
10587
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010588 if ((ctxt == NULL) || (ctxt->input == NULL))
10589 return(-1);
10590
Owen Taylor3473f882001-02-23 17:55:21 +000010591 GROW;
10592
10593 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010594 * SAX: detecting the level.
10595 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010596 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010597
10598 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010599 * SAX: beginning of the document processing.
10600 */
10601 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10602 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010603 if (ctxt->instate == XML_PARSER_EOF)
10604 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010605
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010606 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010607 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010608 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010609 * Get the 4 first bytes and decode the charset
10610 * if enc != XML_CHAR_ENCODING_NONE
10611 * plug some encoding conversion routines.
10612 */
10613 start[0] = RAW;
10614 start[1] = NXT(1);
10615 start[2] = NXT(2);
10616 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010617 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010618 if (enc != XML_CHAR_ENCODING_NONE) {
10619 xmlSwitchEncoding(ctxt, enc);
10620 }
Owen Taylor3473f882001-02-23 17:55:21 +000010621 }
10622
10623
10624 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010625 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010626 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010627 }
10628
10629 /*
10630 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010631 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010632 * than just the first line, unless the amount of data is really
10633 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010634 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010635 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10636 GROW;
10637 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010638 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010639
10640 /*
10641 * Note that we will switch encoding on the fly.
10642 */
10643 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010644 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10645 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010646 /*
10647 * The XML REC instructs us to stop parsing right here
10648 */
10649 return(-1);
10650 }
10651 ctxt->standalone = ctxt->input->standalone;
10652 SKIP_BLANKS;
10653 } else {
10654 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10655 }
10656 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10657 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010658 if (ctxt->instate == XML_PARSER_EOF)
10659 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010660 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10661 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10662 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10663 }
Owen Taylor3473f882001-02-23 17:55:21 +000010664
10665 /*
10666 * The Misc part of the Prolog
10667 */
10668 GROW;
10669 xmlParseMisc(ctxt);
10670
10671 /*
10672 * Then possibly doc type declaration(s) and more Misc
10673 * (doctypedecl Misc*)?
10674 */
10675 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010676 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010677
10678 ctxt->inSubset = 1;
10679 xmlParseDocTypeDecl(ctxt);
10680 if (RAW == '[') {
10681 ctxt->instate = XML_PARSER_DTD;
10682 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010683 if (ctxt->instate == XML_PARSER_EOF)
10684 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010685 }
10686
10687 /*
10688 * Create and update the external subset.
10689 */
10690 ctxt->inSubset = 2;
10691 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10692 (!ctxt->disableSAX))
10693 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10694 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010695 if (ctxt->instate == XML_PARSER_EOF)
10696 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010697 ctxt->inSubset = 0;
10698
Daniel Veillardac4118d2008-01-11 05:27:32 +000010699 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010700
10701 ctxt->instate = XML_PARSER_PROLOG;
10702 xmlParseMisc(ctxt);
10703 }
10704
10705 /*
10706 * Time to start parsing the tree itself
10707 */
10708 GROW;
10709 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010710 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10711 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010712 } else {
10713 ctxt->instate = XML_PARSER_CONTENT;
10714 xmlParseElement(ctxt);
10715 ctxt->instate = XML_PARSER_EPILOG;
10716
10717
10718 /*
10719 * The Misc part at the end
10720 */
10721 xmlParseMisc(ctxt);
10722
Daniel Veillard561b7f82002-03-20 21:55:57 +000010723 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010724 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010725 }
10726 ctxt->instate = XML_PARSER_EOF;
10727 }
10728
10729 /*
10730 * SAX: end of the document processing.
10731 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010732 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010733 ctxt->sax->endDocument(ctxt->userData);
10734
Daniel Veillard5997aca2002-03-18 18:36:20 +000010735 /*
10736 * Remove locally kept entity definitions if the tree was not built
10737 */
10738 if ((ctxt->myDoc != NULL) &&
10739 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10740 xmlFreeDoc(ctxt->myDoc);
10741 ctxt->myDoc = NULL;
10742 }
10743
Daniel Veillardae0765b2008-07-31 19:54:59 +000010744 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10745 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10746 if (ctxt->valid)
10747 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10748 if (ctxt->nsWellFormed)
10749 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10750 if (ctxt->options & XML_PARSE_OLD10)
10751 ctxt->myDoc->properties |= XML_DOC_OLD10;
10752 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010753 if (! ctxt->wellFormed) {
10754 ctxt->valid = 0;
10755 return(-1);
10756 }
Owen Taylor3473f882001-02-23 17:55:21 +000010757 return(0);
10758}
10759
10760/**
10761 * xmlParseExtParsedEnt:
10762 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010763 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010764 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010765 * An external general parsed entity is well-formed if it matches the
10766 * production labeled extParsedEnt.
10767 *
10768 * [78] extParsedEnt ::= TextDecl? content
10769 *
10770 * Returns 0, -1 in case of error. the parser context is augmented
10771 * as a result of the parsing.
10772 */
10773
10774int
10775xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10776 xmlChar start[4];
10777 xmlCharEncoding enc;
10778
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010779 if ((ctxt == NULL) || (ctxt->input == NULL))
10780 return(-1);
10781
Owen Taylor3473f882001-02-23 17:55:21 +000010782 xmlDefaultSAXHandlerInit();
10783
Daniel Veillard309f81d2003-09-23 09:02:53 +000010784 xmlDetectSAX2(ctxt);
10785
Owen Taylor3473f882001-02-23 17:55:21 +000010786 GROW;
10787
10788 /*
10789 * SAX: beginning of the document processing.
10790 */
10791 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10792 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10793
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010794 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010795 * Get the 4 first bytes and decode the charset
10796 * if enc != XML_CHAR_ENCODING_NONE
10797 * plug some encoding conversion routines.
10798 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010799 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10800 start[0] = RAW;
10801 start[1] = NXT(1);
10802 start[2] = NXT(2);
10803 start[3] = NXT(3);
10804 enc = xmlDetectCharEncoding(start, 4);
10805 if (enc != XML_CHAR_ENCODING_NONE) {
10806 xmlSwitchEncoding(ctxt, enc);
10807 }
Owen Taylor3473f882001-02-23 17:55:21 +000010808 }
10809
10810
10811 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010812 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010813 }
10814
10815 /*
10816 * Check for the XMLDecl in the Prolog.
10817 */
10818 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010819 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010820
10821 /*
10822 * Note that we will switch encoding on the fly.
10823 */
10824 xmlParseXMLDecl(ctxt);
10825 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10826 /*
10827 * The XML REC instructs us to stop parsing right here
10828 */
10829 return(-1);
10830 }
10831 SKIP_BLANKS;
10832 } else {
10833 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10834 }
10835 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10836 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010837 if (ctxt->instate == XML_PARSER_EOF)
10838 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010839
10840 /*
10841 * Doing validity checking on chunk doesn't make sense
10842 */
10843 ctxt->instate = XML_PARSER_CONTENT;
10844 ctxt->validate = 0;
10845 ctxt->loadsubset = 0;
10846 ctxt->depth = 0;
10847
10848 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010849 if (ctxt->instate == XML_PARSER_EOF)
10850 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010851
Owen Taylor3473f882001-02-23 17:55:21 +000010852 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010853 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010854 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010855 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010856 }
10857
10858 /*
10859 * SAX: end of the document processing.
10860 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010861 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010862 ctxt->sax->endDocument(ctxt->userData);
10863
10864 if (! ctxt->wellFormed) return(-1);
10865 return(0);
10866}
10867
Daniel Veillard73b013f2003-09-30 12:36:01 +000010868#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010869/************************************************************************
10870 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010871 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010872 * *
10873 ************************************************************************/
10874
10875/**
10876 * xmlParseLookupSequence:
10877 * @ctxt: an XML parser context
10878 * @first: the first char to lookup
10879 * @next: the next char to lookup or zero
10880 * @third: the next char to lookup or zero
10881 *
10882 * Try to find if a sequence (first, next, third) or just (first next) or
10883 * (first) is available in the input stream.
10884 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10885 * to avoid rescanning sequences of bytes, it DOES change the state of the
10886 * parser, do not use liberally.
10887 *
10888 * Returns the index to the current parsing point if the full sequence
10889 * is available, -1 otherwise.
10890 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010891static int
Owen Taylor3473f882001-02-23 17:55:21 +000010892xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10893 xmlChar next, xmlChar third) {
10894 int base, len;
10895 xmlParserInputPtr in;
10896 const xmlChar *buf;
10897
10898 in = ctxt->input;
10899 if (in == NULL) return(-1);
10900 base = in->cur - in->base;
10901 if (base < 0) return(-1);
10902 if (ctxt->checkIndex > base)
10903 base = ctxt->checkIndex;
10904 if (in->buf == NULL) {
10905 buf = in->base;
10906 len = in->length;
10907 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010908 buf = xmlBufContent(in->buf->buffer);
10909 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010910 }
10911 /* take into account the sequence length */
10912 if (third) len -= 2;
10913 else if (next) len --;
10914 for (;base < len;base++) {
10915 if (buf[base] == first) {
10916 if (third != 0) {
10917 if ((buf[base + 1] != next) ||
10918 (buf[base + 2] != third)) continue;
10919 } else if (next != 0) {
10920 if (buf[base + 1] != next) continue;
10921 }
10922 ctxt->checkIndex = 0;
10923#ifdef DEBUG_PUSH
10924 if (next == 0)
10925 xmlGenericError(xmlGenericErrorContext,
10926 "PP: lookup '%c' found at %d\n",
10927 first, base);
10928 else if (third == 0)
10929 xmlGenericError(xmlGenericErrorContext,
10930 "PP: lookup '%c%c' found at %d\n",
10931 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010932 else
Owen Taylor3473f882001-02-23 17:55:21 +000010933 xmlGenericError(xmlGenericErrorContext,
10934 "PP: lookup '%c%c%c' found at %d\n",
10935 first, next, third, base);
10936#endif
10937 return(base - (in->cur - in->base));
10938 }
10939 }
10940 ctxt->checkIndex = base;
10941#ifdef DEBUG_PUSH
10942 if (next == 0)
10943 xmlGenericError(xmlGenericErrorContext,
10944 "PP: lookup '%c' failed\n", first);
10945 else if (third == 0)
10946 xmlGenericError(xmlGenericErrorContext,
10947 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010948 else
Owen Taylor3473f882001-02-23 17:55:21 +000010949 xmlGenericError(xmlGenericErrorContext,
10950 "PP: lookup '%c%c%c' failed\n", first, next, third);
10951#endif
10952 return(-1);
10953}
10954
10955/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010956 * xmlParseGetLasts:
10957 * @ctxt: an XML parser context
10958 * @lastlt: pointer to store the last '<' from the input
10959 * @lastgt: pointer to store the last '>' from the input
10960 *
10961 * Lookup the last < and > in the current chunk
10962 */
10963static void
10964xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10965 const xmlChar **lastgt) {
10966 const xmlChar *tmp;
10967
10968 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10969 xmlGenericError(xmlGenericErrorContext,
10970 "Internal error: xmlParseGetLasts\n");
10971 return;
10972 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010973 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010974 tmp = ctxt->input->end;
10975 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010976 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010977 if (tmp < ctxt->input->base) {
10978 *lastlt = NULL;
10979 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010980 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010981 *lastlt = tmp;
10982 tmp++;
10983 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10984 if (*tmp == '\'') {
10985 tmp++;
10986 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10987 if (tmp < ctxt->input->end) tmp++;
10988 } else if (*tmp == '"') {
10989 tmp++;
10990 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10991 if (tmp < ctxt->input->end) tmp++;
10992 } else
10993 tmp++;
10994 }
10995 if (tmp < ctxt->input->end)
10996 *lastgt = tmp;
10997 else {
10998 tmp = *lastlt;
10999 tmp--;
11000 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11001 if (tmp >= ctxt->input->base)
11002 *lastgt = tmp;
11003 else
11004 *lastgt = NULL;
11005 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011006 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011007 } else {
11008 *lastlt = NULL;
11009 *lastgt = NULL;
11010 }
11011}
11012/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011013 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011014 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011015 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011016 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011017 *
11018 * Check that the block of characters is okay as SCdata content [20]
11019 *
11020 * Returns the number of bytes to pass if okay, a negative index where an
Nick Wellnhofer8bbe4502017-06-17 16:15:09 +020011021 * UTF-8 error occurred otherwise
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011022 */
11023static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011024xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011025 int ix;
11026 unsigned char c;
11027 int codepoint;
11028
11029 if ((utf == NULL) || (len <= 0))
11030 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011031
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011032 for (ix = 0; ix < len;) { /* string is 0-terminated */
11033 c = utf[ix];
11034 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11035 if (c >= 0x20)
11036 ix++;
11037 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11038 ix++;
11039 else
11040 return(-ix);
11041 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011042 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011043 if ((utf[ix+1] & 0xc0 ) != 0x80)
11044 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011045 codepoint = (utf[ix] & 0x1f) << 6;
11046 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011047 if (!xmlIsCharQ(codepoint))
11048 return(-ix);
11049 ix += 2;
11050 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011051 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011052 if (((utf[ix+1] & 0xc0) != 0x80) ||
11053 ((utf[ix+2] & 0xc0) != 0x80))
11054 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011055 codepoint = (utf[ix] & 0xf) << 12;
11056 codepoint |= (utf[ix+1] & 0x3f) << 6;
11057 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011058 if (!xmlIsCharQ(codepoint))
11059 return(-ix);
11060 ix += 3;
11061 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011062 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011063 if (((utf[ix+1] & 0xc0) != 0x80) ||
11064 ((utf[ix+2] & 0xc0) != 0x80) ||
11065 ((utf[ix+3] & 0xc0) != 0x80))
11066 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011067 codepoint = (utf[ix] & 0x7) << 18;
11068 codepoint |= (utf[ix+1] & 0x3f) << 12;
11069 codepoint |= (utf[ix+2] & 0x3f) << 6;
11070 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011071 if (!xmlIsCharQ(codepoint))
11072 return(-ix);
11073 ix += 4;
11074 } else /* unknown encoding */
11075 return(-ix);
11076 }
11077 return(ix);
11078}
11079
11080/**
Owen Taylor3473f882001-02-23 17:55:21 +000011081 * xmlParseTryOrFinish:
11082 * @ctxt: an XML parser context
11083 * @terminate: last chunk indicator
11084 *
11085 * Try to progress on parsing
11086 *
11087 * Returns zero if no parsing was possible
11088 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011089static int
Owen Taylor3473f882001-02-23 17:55:21 +000011090xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11091 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011092 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011093 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011094 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011095
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011096 if (ctxt->input == NULL)
11097 return(0);
11098
Owen Taylor3473f882001-02-23 17:55:21 +000011099#ifdef DEBUG_PUSH
11100 switch (ctxt->instate) {
11101 case XML_PARSER_EOF:
11102 xmlGenericError(xmlGenericErrorContext,
11103 "PP: try EOF\n"); break;
11104 case XML_PARSER_START:
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: try START\n"); break;
11107 case XML_PARSER_MISC:
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: try MISC\n");break;
11110 case XML_PARSER_COMMENT:
11111 xmlGenericError(xmlGenericErrorContext,
11112 "PP: try COMMENT\n");break;
11113 case XML_PARSER_PROLOG:
11114 xmlGenericError(xmlGenericErrorContext,
11115 "PP: try PROLOG\n");break;
11116 case XML_PARSER_START_TAG:
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: try START_TAG\n");break;
11119 case XML_PARSER_CONTENT:
11120 xmlGenericError(xmlGenericErrorContext,
11121 "PP: try CONTENT\n");break;
11122 case XML_PARSER_CDATA_SECTION:
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: try CDATA_SECTION\n");break;
11125 case XML_PARSER_END_TAG:
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: try END_TAG\n");break;
11128 case XML_PARSER_ENTITY_DECL:
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: try ENTITY_DECL\n");break;
11131 case XML_PARSER_ENTITY_VALUE:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try ENTITY_VALUE\n");break;
11134 case XML_PARSER_ATTRIBUTE_VALUE:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try ATTRIBUTE_VALUE\n");break;
11137 case XML_PARSER_DTD:
11138 xmlGenericError(xmlGenericErrorContext,
11139 "PP: try DTD\n");break;
11140 case XML_PARSER_EPILOG:
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: try EPILOG\n");break;
11143 case XML_PARSER_PI:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: try PI\n");break;
11146 case XML_PARSER_IGNORE:
11147 xmlGenericError(xmlGenericErrorContext,
11148 "PP: try IGNORE\n");break;
11149 }
11150#endif
11151
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011152 if ((ctxt->input != NULL) &&
11153 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011154 xmlSHRINK(ctxt);
11155 ctxt->checkIndex = 0;
11156 }
11157 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011158
Daniel Veillarde50ba812013-04-11 15:54:51 +080011159 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011160 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011161 return(0);
11162
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011163 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011164 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011165 avail = ctxt->input->length -
11166 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011167 else {
11168 /*
11169 * If we are operating on converted input, try to flush
11170 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011171 * buffer. But do not do this in document start where
11172 * encoding="..." may not have been read and we work on a
11173 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011174 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011175 if ((ctxt->instate != XML_PARSER_START) &&
11176 (ctxt->input->buf->raw != NULL) &&
11177 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011178 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11179 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011180 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011181
11182 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011183 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11184 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011185 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011186 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011187 (ctxt->input->cur - ctxt->input->base);
11188 }
Owen Taylor3473f882001-02-23 17:55:21 +000011189 if (avail < 1)
11190 goto done;
11191 switch (ctxt->instate) {
11192 case XML_PARSER_EOF:
11193 /*
11194 * Document parsing is done !
11195 */
11196 goto done;
11197 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011198 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11199 xmlChar start[4];
11200 xmlCharEncoding enc;
11201
11202 /*
11203 * Very first chars read from the document flow.
11204 */
11205 if (avail < 4)
11206 goto done;
11207
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011208 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011209 * Get the 4 first bytes and decode the charset
11210 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011211 * plug some encoding conversion routines,
11212 * else xmlSwitchEncoding will set to (default)
11213 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011214 */
11215 start[0] = RAW;
11216 start[1] = NXT(1);
11217 start[2] = NXT(2);
11218 start[3] = NXT(3);
11219 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011220 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011221 break;
11222 }
Owen Taylor3473f882001-02-23 17:55:21 +000011223
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011224 if (avail < 2)
11225 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011226 cur = ctxt->input->cur[0];
11227 next = ctxt->input->cur[1];
11228 if (cur == 0) {
11229 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11230 ctxt->sax->setDocumentLocator(ctxt->userData,
11231 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011232 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011233 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011234#ifdef DEBUG_PUSH
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: entering EOF\n");
11237#endif
11238 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11239 ctxt->sax->endDocument(ctxt->userData);
11240 goto done;
11241 }
11242 if ((cur == '<') && (next == '?')) {
11243 /* PI or XML decl */
11244 if (avail < 5) return(ret);
11245 if ((!terminate) &&
11246 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11247 return(ret);
11248 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11249 ctxt->sax->setDocumentLocator(ctxt->userData,
11250 &xmlDefaultSAXLocator);
11251 if ((ctxt->input->cur[2] == 'x') &&
11252 (ctxt->input->cur[3] == 'm') &&
11253 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011254 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011255 ret += 5;
11256#ifdef DEBUG_PUSH
11257 xmlGenericError(xmlGenericErrorContext,
11258 "PP: Parsing XML Decl\n");
11259#endif
11260 xmlParseXMLDecl(ctxt);
11261 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11262 /*
11263 * The XML REC instructs us to stop parsing right
11264 * here
11265 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011266 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011267 return(0);
11268 }
11269 ctxt->standalone = ctxt->input->standalone;
11270 if ((ctxt->encoding == NULL) &&
11271 (ctxt->input->encoding != NULL))
11272 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11273 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11274 (!ctxt->disableSAX))
11275 ctxt->sax->startDocument(ctxt->userData);
11276 ctxt->instate = XML_PARSER_MISC;
11277#ifdef DEBUG_PUSH
11278 xmlGenericError(xmlGenericErrorContext,
11279 "PP: entering MISC\n");
11280#endif
11281 } else {
11282 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11283 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11284 (!ctxt->disableSAX))
11285 ctxt->sax->startDocument(ctxt->userData);
11286 ctxt->instate = XML_PARSER_MISC;
11287#ifdef DEBUG_PUSH
11288 xmlGenericError(xmlGenericErrorContext,
11289 "PP: entering MISC\n");
11290#endif
11291 }
11292 } else {
11293 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11294 ctxt->sax->setDocumentLocator(ctxt->userData,
11295 &xmlDefaultSAXLocator);
11296 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011297 if (ctxt->version == NULL) {
11298 xmlErrMemory(ctxt, NULL);
11299 break;
11300 }
Owen Taylor3473f882001-02-23 17:55:21 +000011301 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11302 (!ctxt->disableSAX))
11303 ctxt->sax->startDocument(ctxt->userData);
11304 ctxt->instate = XML_PARSER_MISC;
11305#ifdef DEBUG_PUSH
11306 xmlGenericError(xmlGenericErrorContext,
11307 "PP: entering MISC\n");
11308#endif
11309 }
11310 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011311 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011312 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011313 const xmlChar *prefix = NULL;
11314 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011315 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011316
11317 if ((avail < 2) && (ctxt->inputNr == 1))
11318 goto done;
11319 cur = ctxt->input->cur[0];
11320 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011321 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011322 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011323 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11324 ctxt->sax->endDocument(ctxt->userData);
11325 goto done;
11326 }
11327 if (!terminate) {
11328 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011329 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011330 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011331 goto done;
11332 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11333 goto done;
11334 }
11335 }
11336 if (ctxt->spaceNr == 0)
11337 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011338 else if (*ctxt->space == -2)
11339 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011340 else
11341 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011342#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011343 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011344#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011345 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011346#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011347 else
11348 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011349#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011350 if (ctxt->instate == XML_PARSER_EOF)
11351 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011352 if (name == NULL) {
11353 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011354 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011355 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11356 ctxt->sax->endDocument(ctxt->userData);
11357 goto done;
11358 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011359#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011360 /*
11361 * [ VC: Root Element Type ]
11362 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011363 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011364 */
11365 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11366 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11367 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011368#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011369
11370 /*
11371 * Check for an Empty Element.
11372 */
11373 if ((RAW == '/') && (NXT(1) == '>')) {
11374 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011375
11376 if (ctxt->sax2) {
11377 if ((ctxt->sax != NULL) &&
11378 (ctxt->sax->endElementNs != NULL) &&
11379 (!ctxt->disableSAX))
11380 ctxt->sax->endElementNs(ctxt->userData, name,
11381 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011382 if (ctxt->nsNr - nsNr > 0)
11383 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011384#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011385 } else {
11386 if ((ctxt->sax != NULL) &&
11387 (ctxt->sax->endElement != NULL) &&
11388 (!ctxt->disableSAX))
11389 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011390#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011391 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011392 if (ctxt->instate == XML_PARSER_EOF)
11393 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011394 spacePop(ctxt);
11395 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011396 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011397 } else {
11398 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011399 }
Daniel Veillard65686452012-07-19 18:25:01 +080011400 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011401 break;
11402 }
11403 if (RAW == '>') {
11404 NEXT;
11405 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011406 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011407 "Couldn't find end of Start Tag %s\n",
11408 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011409 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011410 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011411 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011412 if (ctxt->sax2)
11413 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011414#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011415 else
11416 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011417#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011418
Daniel Veillarda880b122003-04-21 21:36:41 +000011419 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011420 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011421 break;
11422 }
11423 case XML_PARSER_CONTENT: {
11424 const xmlChar *test;
11425 unsigned int cons;
11426 if ((avail < 2) && (ctxt->inputNr == 1))
11427 goto done;
11428 cur = ctxt->input->cur[0];
11429 next = ctxt->input->cur[1];
11430
11431 test = CUR_PTR;
11432 cons = ctxt->input->consumed;
11433 if ((cur == '<') && (next == '/')) {
11434 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011435 break;
11436 } else if ((cur == '<') && (next == '?')) {
11437 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011438 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11439 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011440 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011441 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011442 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011443 ctxt->instate = XML_PARSER_CONTENT;
11444 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011445 } else if ((cur == '<') && (next != '!')) {
11446 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011447 break;
11448 } else if ((cur == '<') && (next == '!') &&
11449 (ctxt->input->cur[2] == '-') &&
11450 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011451 int term;
11452
11453 if (avail < 4)
11454 goto done;
11455 ctxt->input->cur += 4;
11456 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11457 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011458 if ((!terminate) && (term < 0)) {
11459 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011460 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011461 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011462 xmlParseComment(ctxt);
11463 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011464 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011465 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11466 (ctxt->input->cur[2] == '[') &&
11467 (ctxt->input->cur[3] == 'C') &&
11468 (ctxt->input->cur[4] == 'D') &&
11469 (ctxt->input->cur[5] == 'A') &&
11470 (ctxt->input->cur[6] == 'T') &&
11471 (ctxt->input->cur[7] == 'A') &&
11472 (ctxt->input->cur[8] == '[')) {
11473 SKIP(9);
11474 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011475 break;
11476 } else if ((cur == '<') && (next == '!') &&
11477 (avail < 9)) {
11478 goto done;
11479 } else if (cur == '&') {
11480 if ((!terminate) &&
11481 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11482 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011483 xmlParseReference(ctxt);
11484 } else {
11485 /* TODO Avoid the extra copy, handle directly !!! */
11486 /*
11487 * Goal of the following test is:
11488 * - minimize calls to the SAX 'character' callback
11489 * when they are mergeable
11490 * - handle an problem for isBlank when we only parse
11491 * a sequence of blank chars and the next one is
11492 * not available to check against '<' presence.
11493 * - tries to homogenize the differences in SAX
11494 * callbacks between the push and pull versions
11495 * of the parser.
11496 */
11497 if ((ctxt->inputNr == 1) &&
11498 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11499 if (!terminate) {
11500 if (ctxt->progressive) {
11501 if ((lastlt == NULL) ||
11502 (ctxt->input->cur > lastlt))
11503 goto done;
11504 } else if (xmlParseLookupSequence(ctxt,
11505 '<', 0, 0) < 0) {
11506 goto done;
11507 }
11508 }
11509 }
11510 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011511 xmlParseCharData(ctxt, 0);
11512 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011513 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011514 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11515 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011516 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011517 break;
11518 }
11519 break;
11520 }
11521 case XML_PARSER_END_TAG:
11522 if (avail < 2)
11523 goto done;
11524 if (!terminate) {
11525 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011526 /* > can be found unescaped in attribute values */
11527 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011528 goto done;
11529 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11530 goto done;
11531 }
11532 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011533 if (ctxt->sax2) {
11534 xmlParseEndTag2(ctxt,
11535 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11536 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011537 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011538 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011539 }
11540#ifdef LIBXML_SAX1_ENABLED
11541 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011542 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011543#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011544 if (ctxt->instate == XML_PARSER_EOF) {
11545 /* Nothing */
11546 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011547 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011548 } else {
11549 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011550 }
11551 break;
11552 case XML_PARSER_CDATA_SECTION: {
11553 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011554 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011555 * cdataBlock merge back contiguous callbacks.
11556 */
11557 int base;
11558
11559 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11560 if (base < 0) {
11561 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011562 int tmp;
11563
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011564 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011565 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011566 if (tmp < 0) {
11567 tmp = -tmp;
11568 ctxt->input->cur += tmp;
11569 goto encoding_error;
11570 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11572 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011573 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011574 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011575 else if (ctxt->sax->characters != NULL)
11576 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011577 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011578 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011579 if (ctxt->instate == XML_PARSER_EOF)
11580 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011581 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011582 ctxt->checkIndex = 0;
11583 }
11584 goto done;
11585 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011586 int tmp;
11587
David Kilzer4f8606c2016-01-05 13:38:09 -080011588 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011589 if ((tmp < 0) || (tmp != base)) {
11590 tmp = -tmp;
11591 ctxt->input->cur += tmp;
11592 goto encoding_error;
11593 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011594 if ((ctxt->sax != NULL) && (base == 0) &&
11595 (ctxt->sax->cdataBlock != NULL) &&
11596 (!ctxt->disableSAX)) {
11597 /*
11598 * Special case to provide identical behaviour
11599 * between pull and push parsers on enpty CDATA
11600 * sections
11601 */
11602 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11603 (!strncmp((const char *)&ctxt->input->cur[-9],
11604 "<![CDATA[", 9)))
11605 ctxt->sax->cdataBlock(ctxt->userData,
11606 BAD_CAST "", 0);
11607 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011608 (!ctxt->disableSAX)) {
11609 if (ctxt->sax->cdataBlock != NULL)
11610 ctxt->sax->cdataBlock(ctxt->userData,
11611 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011612 else if (ctxt->sax->characters != NULL)
11613 ctxt->sax->characters(ctxt->userData,
11614 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011615 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011616 if (ctxt->instate == XML_PARSER_EOF)
11617 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011618 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011619 ctxt->checkIndex = 0;
11620 ctxt->instate = XML_PARSER_CONTENT;
11621#ifdef DEBUG_PUSH
11622 xmlGenericError(xmlGenericErrorContext,
11623 "PP: entering CONTENT\n");
11624#endif
11625 }
11626 break;
11627 }
Owen Taylor3473f882001-02-23 17:55:21 +000011628 case XML_PARSER_MISC:
11629 SKIP_BLANKS;
11630 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011631 avail = ctxt->input->length -
11632 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011633 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011634 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011635 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011636 if (avail < 2)
11637 goto done;
11638 cur = ctxt->input->cur[0];
11639 next = ctxt->input->cur[1];
11640 if ((cur == '<') && (next == '?')) {
11641 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011642 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11643 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011644 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011645 }
Owen Taylor3473f882001-02-23 17:55:21 +000011646#ifdef DEBUG_PUSH
11647 xmlGenericError(xmlGenericErrorContext,
11648 "PP: Parsing PI\n");
11649#endif
11650 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011651 if (ctxt->instate == XML_PARSER_EOF)
11652 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011653 ctxt->instate = XML_PARSER_MISC;
11654 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011655 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011656 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011657 (ctxt->input->cur[2] == '-') &&
11658 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011659 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011660 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11661 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011662 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011663 }
Owen Taylor3473f882001-02-23 17:55:21 +000011664#ifdef DEBUG_PUSH
11665 xmlGenericError(xmlGenericErrorContext,
11666 "PP: Parsing Comment\n");
11667#endif
11668 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011669 if (ctxt->instate == XML_PARSER_EOF)
11670 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011671 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011672 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011673 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011674 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011675 (ctxt->input->cur[2] == 'D') &&
11676 (ctxt->input->cur[3] == 'O') &&
11677 (ctxt->input->cur[4] == 'C') &&
11678 (ctxt->input->cur[5] == 'T') &&
11679 (ctxt->input->cur[6] == 'Y') &&
11680 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011681 (ctxt->input->cur[8] == 'E')) {
11682 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011683 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11684 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011685 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011686 }
Owen Taylor3473f882001-02-23 17:55:21 +000011687#ifdef DEBUG_PUSH
11688 xmlGenericError(xmlGenericErrorContext,
11689 "PP: Parsing internal subset\n");
11690#endif
11691 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011692 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011693 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011694 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011695 if (ctxt->instate == XML_PARSER_EOF)
11696 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011697 if (RAW == '[') {
11698 ctxt->instate = XML_PARSER_DTD;
11699#ifdef DEBUG_PUSH
11700 xmlGenericError(xmlGenericErrorContext,
11701 "PP: entering DTD\n");
11702#endif
11703 } else {
11704 /*
11705 * Create and update the external subset.
11706 */
11707 ctxt->inSubset = 2;
11708 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11709 (ctxt->sax->externalSubset != NULL))
11710 ctxt->sax->externalSubset(ctxt->userData,
11711 ctxt->intSubName, ctxt->extSubSystem,
11712 ctxt->extSubURI);
11713 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011714 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011715 ctxt->instate = XML_PARSER_PROLOG;
11716#ifdef DEBUG_PUSH
11717 xmlGenericError(xmlGenericErrorContext,
11718 "PP: entering PROLOG\n");
11719#endif
11720 }
11721 } else if ((cur == '<') && (next == '!') &&
11722 (avail < 9)) {
11723 goto done;
11724 } else {
11725 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011726 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011727 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011728#ifdef DEBUG_PUSH
11729 xmlGenericError(xmlGenericErrorContext,
11730 "PP: entering START_TAG\n");
11731#endif
11732 }
11733 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011734 case XML_PARSER_PROLOG:
11735 SKIP_BLANKS;
11736 if (ctxt->input->buf == NULL)
11737 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11738 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011739 avail = xmlBufUse(ctxt->input->buf->buffer) -
11740 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011741 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011742 goto done;
11743 cur = ctxt->input->cur[0];
11744 next = ctxt->input->cur[1];
11745 if ((cur == '<') && (next == '?')) {
11746 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011747 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11748 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011749 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011750 }
Owen Taylor3473f882001-02-23 17:55:21 +000011751#ifdef DEBUG_PUSH
11752 xmlGenericError(xmlGenericErrorContext,
11753 "PP: Parsing PI\n");
11754#endif
11755 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011756 if (ctxt->instate == XML_PARSER_EOF)
11757 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011758 ctxt->instate = XML_PARSER_PROLOG;
11759 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011760 } else if ((cur == '<') && (next == '!') &&
11761 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11762 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011763 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11764 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011765 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011766 }
Owen Taylor3473f882001-02-23 17:55:21 +000011767#ifdef DEBUG_PUSH
11768 xmlGenericError(xmlGenericErrorContext,
11769 "PP: Parsing Comment\n");
11770#endif
11771 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011772 if (ctxt->instate == XML_PARSER_EOF)
11773 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011774 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011775 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011776 } else if ((cur == '<') && (next == '!') &&
11777 (avail < 4)) {
11778 goto done;
11779 } else {
11780 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011781 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011782 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011783 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011784#ifdef DEBUG_PUSH
11785 xmlGenericError(xmlGenericErrorContext,
11786 "PP: entering START_TAG\n");
11787#endif
11788 }
11789 break;
11790 case XML_PARSER_EPILOG:
11791 SKIP_BLANKS;
11792 if (ctxt->input->buf == NULL)
11793 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11794 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011795 avail = xmlBufUse(ctxt->input->buf->buffer) -
11796 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011797 if (avail < 2)
11798 goto done;
11799 cur = ctxt->input->cur[0];
11800 next = ctxt->input->cur[1];
11801 if ((cur == '<') && (next == '?')) {
11802 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011803 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11804 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011805 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011806 }
Owen Taylor3473f882001-02-23 17:55:21 +000011807#ifdef DEBUG_PUSH
11808 xmlGenericError(xmlGenericErrorContext,
11809 "PP: Parsing PI\n");
11810#endif
11811 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011812 if (ctxt->instate == XML_PARSER_EOF)
11813 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011814 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011815 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011816 } else if ((cur == '<') && (next == '!') &&
11817 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11818 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011819 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11820 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011821 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011822 }
Owen Taylor3473f882001-02-23 17:55:21 +000011823#ifdef DEBUG_PUSH
11824 xmlGenericError(xmlGenericErrorContext,
11825 "PP: Parsing Comment\n");
11826#endif
11827 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011828 if (ctxt->instate == XML_PARSER_EOF)
11829 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011830 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011831 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011832 } else if ((cur == '<') && (next == '!') &&
11833 (avail < 4)) {
11834 goto done;
11835 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011836 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011837 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011838#ifdef DEBUG_PUSH
11839 xmlGenericError(xmlGenericErrorContext,
11840 "PP: entering EOF\n");
11841#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011842 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011843 ctxt->sax->endDocument(ctxt->userData);
11844 goto done;
11845 }
11846 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011847 case XML_PARSER_DTD: {
11848 /*
11849 * Sorry but progressive parsing of the internal subset
11850 * is not expected to be supported. We first check that
11851 * the full content of the internal subset is available and
11852 * the parsing is launched only at that point.
11853 * Internal subset ends up with "']' S? '>'" in an unescaped
11854 * section and not in a ']]>' sequence which are conditional
11855 * sections (whoever argued to keep that crap in XML deserve
11856 * a place in hell !).
11857 */
11858 int base, i;
11859 xmlChar *buf;
11860 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011861 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011862
11863 base = ctxt->input->cur - ctxt->input->base;
11864 if (base < 0) return(0);
11865 if (ctxt->checkIndex > base)
11866 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011867 buf = xmlBufContent(ctxt->input->buf->buffer);
11868 use = xmlBufUse(ctxt->input->buf->buffer);
11869 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011870 if (quote != 0) {
11871 if (buf[base] == quote)
11872 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011873 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011874 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011875 if ((quote == 0) && (buf[base] == '<')) {
11876 int found = 0;
11877 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011878 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011879 (buf[base + 1] == '!') &&
11880 (buf[base + 2] == '-') &&
11881 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011882 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011883 if ((buf[base] == '-') &&
11884 (buf[base + 1] == '-') &&
11885 (buf[base + 2] == '>')) {
11886 found = 1;
11887 base += 2;
11888 break;
11889 }
11890 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011891 if (!found) {
11892#if 0
11893 fprintf(stderr, "unfinished comment\n");
11894#endif
11895 break; /* for */
11896 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011897 continue;
11898 }
11899 }
Owen Taylor3473f882001-02-23 17:55:21 +000011900 if (buf[base] == '"') {
11901 quote = '"';
11902 continue;
11903 }
11904 if (buf[base] == '\'') {
11905 quote = '\'';
11906 continue;
11907 }
11908 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011909#if 0
11910 fprintf(stderr, "%c%c%c%c: ", buf[base],
11911 buf[base + 1], buf[base + 2], buf[base + 3]);
11912#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011913 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011914 break;
11915 if (buf[base + 1] == ']') {
11916 /* conditional crap, skip both ']' ! */
11917 base++;
11918 continue;
11919 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011920 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011921 if (buf[base + i] == '>') {
11922#if 0
11923 fprintf(stderr, "found\n");
11924#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011925 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011926 }
11927 if (!IS_BLANK_CH(buf[base + i])) {
11928#if 0
11929 fprintf(stderr, "not found\n");
11930#endif
11931 goto not_end_of_int_subset;
11932 }
Owen Taylor3473f882001-02-23 17:55:21 +000011933 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011934#if 0
11935 fprintf(stderr, "end of stream\n");
11936#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011937 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011938
Owen Taylor3473f882001-02-23 17:55:21 +000011939 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011940not_end_of_int_subset:
11941 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011942 }
11943 /*
11944 * We didn't found the end of the Internal subset
11945 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011946 if (quote == 0)
11947 ctxt->checkIndex = base;
11948 else
11949 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011950#ifdef DEBUG_PUSH
11951 if (next == 0)
11952 xmlGenericError(xmlGenericErrorContext,
11953 "PP: lookup of int subset end filed\n");
11954#endif
11955 goto done;
11956
11957found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011958 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011959 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011960 if (ctxt->instate == XML_PARSER_EOF)
11961 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011962 ctxt->inSubset = 2;
11963 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11964 (ctxt->sax->externalSubset != NULL))
11965 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11966 ctxt->extSubSystem, ctxt->extSubURI);
11967 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011968 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011969 if (ctxt->instate == XML_PARSER_EOF)
11970 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011971 ctxt->instate = XML_PARSER_PROLOG;
11972 ctxt->checkIndex = 0;
11973#ifdef DEBUG_PUSH
11974 xmlGenericError(xmlGenericErrorContext,
11975 "PP: entering PROLOG\n");
11976#endif
11977 break;
11978 }
11979 case XML_PARSER_COMMENT:
11980 xmlGenericError(xmlGenericErrorContext,
11981 "PP: internal error, state == COMMENT\n");
11982 ctxt->instate = XML_PARSER_CONTENT;
11983#ifdef DEBUG_PUSH
11984 xmlGenericError(xmlGenericErrorContext,
11985 "PP: entering CONTENT\n");
11986#endif
11987 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011988 case XML_PARSER_IGNORE:
11989 xmlGenericError(xmlGenericErrorContext,
11990 "PP: internal error, state == IGNORE");
11991 ctxt->instate = XML_PARSER_DTD;
11992#ifdef DEBUG_PUSH
11993 xmlGenericError(xmlGenericErrorContext,
11994 "PP: entering DTD\n");
11995#endif
11996 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011997 case XML_PARSER_PI:
11998 xmlGenericError(xmlGenericErrorContext,
11999 "PP: internal error, state == PI\n");
12000 ctxt->instate = XML_PARSER_CONTENT;
12001#ifdef DEBUG_PUSH
12002 xmlGenericError(xmlGenericErrorContext,
12003 "PP: entering CONTENT\n");
12004#endif
12005 break;
12006 case XML_PARSER_ENTITY_DECL:
12007 xmlGenericError(xmlGenericErrorContext,
12008 "PP: internal error, state == ENTITY_DECL\n");
12009 ctxt->instate = XML_PARSER_DTD;
12010#ifdef DEBUG_PUSH
12011 xmlGenericError(xmlGenericErrorContext,
12012 "PP: entering DTD\n");
12013#endif
12014 break;
12015 case XML_PARSER_ENTITY_VALUE:
12016 xmlGenericError(xmlGenericErrorContext,
12017 "PP: internal error, state == ENTITY_VALUE\n");
12018 ctxt->instate = XML_PARSER_CONTENT;
12019#ifdef DEBUG_PUSH
12020 xmlGenericError(xmlGenericErrorContext,
12021 "PP: entering DTD\n");
12022#endif
12023 break;
12024 case XML_PARSER_ATTRIBUTE_VALUE:
12025 xmlGenericError(xmlGenericErrorContext,
12026 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12027 ctxt->instate = XML_PARSER_START_TAG;
12028#ifdef DEBUG_PUSH
12029 xmlGenericError(xmlGenericErrorContext,
12030 "PP: entering START_TAG\n");
12031#endif
12032 break;
12033 case XML_PARSER_SYSTEM_LITERAL:
12034 xmlGenericError(xmlGenericErrorContext,
12035 "PP: internal error, state == SYSTEM_LITERAL\n");
12036 ctxt->instate = XML_PARSER_START_TAG;
12037#ifdef DEBUG_PUSH
12038 xmlGenericError(xmlGenericErrorContext,
12039 "PP: entering START_TAG\n");
12040#endif
12041 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012042 case XML_PARSER_PUBLIC_LITERAL:
12043 xmlGenericError(xmlGenericErrorContext,
12044 "PP: internal error, state == PUBLIC_LITERAL\n");
12045 ctxt->instate = XML_PARSER_START_TAG;
12046#ifdef DEBUG_PUSH
12047 xmlGenericError(xmlGenericErrorContext,
12048 "PP: entering START_TAG\n");
12049#endif
12050 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012051 }
12052 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012053done:
Owen Taylor3473f882001-02-23 17:55:21 +000012054#ifdef DEBUG_PUSH
12055 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12056#endif
12057 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012058encoding_error:
12059 {
12060 char buffer[150];
12061
12062 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12063 ctxt->input->cur[0], ctxt->input->cur[1],
12064 ctxt->input->cur[2], ctxt->input->cur[3]);
12065 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12066 "Input is not proper UTF-8, indicate encoding !\n%s",
12067 BAD_CAST buffer, NULL);
12068 }
12069 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012070}
12071
12072/**
Daniel Veillard65686452012-07-19 18:25:01 +080012073 * xmlParseCheckTransition:
12074 * @ctxt: an XML parser context
12075 * @chunk: a char array
12076 * @size: the size in byte of the chunk
12077 *
12078 * Check depending on the current parser state if the chunk given must be
12079 * processed immediately or one need more data to advance on parsing.
12080 *
12081 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12082 */
12083static int
12084xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12085 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12086 return(-1);
12087 if (ctxt->instate == XML_PARSER_START_TAG) {
12088 if (memchr(chunk, '>', size) != NULL)
12089 return(1);
12090 return(0);
12091 }
12092 if (ctxt->progressive == XML_PARSER_COMMENT) {
12093 if (memchr(chunk, '>', size) != NULL)
12094 return(1);
12095 return(0);
12096 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012097 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12098 if (memchr(chunk, '>', size) != NULL)
12099 return(1);
12100 return(0);
12101 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012102 if (ctxt->progressive == XML_PARSER_PI) {
12103 if (memchr(chunk, '>', size) != NULL)
12104 return(1);
12105 return(0);
12106 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012107 if (ctxt->instate == XML_PARSER_END_TAG) {
12108 if (memchr(chunk, '>', size) != NULL)
12109 return(1);
12110 return(0);
12111 }
12112 if ((ctxt->progressive == XML_PARSER_DTD) ||
12113 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012114 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012115 return(1);
12116 return(0);
12117 }
Daniel Veillard65686452012-07-19 18:25:01 +080012118 return(1);
12119}
12120
12121/**
Owen Taylor3473f882001-02-23 17:55:21 +000012122 * xmlParseChunk:
12123 * @ctxt: an XML parser context
12124 * @chunk: an char array
12125 * @size: the size in byte of the chunk
12126 * @terminate: last chunk indicator
12127 *
12128 * Parse a Chunk of memory
12129 *
12130 * Returns zero if no error, the xmlParserErrors otherwise.
12131 */
12132int
12133xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12134 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012135 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012136 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012137 size_t old_avail = 0;
12138 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012139
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012140 if (ctxt == NULL)
12141 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012142 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012143 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012144 if (ctxt->instate == XML_PARSER_EOF)
12145 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012146 if (ctxt->instate == XML_PARSER_START)
12147 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012148 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12149 (chunk[size - 1] == '\r')) {
12150 end_in_lf = 1;
12151 size--;
12152 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012153
12154xmldecl_done:
12155
Owen Taylor3473f882001-02-23 17:55:21 +000012156 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12157 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012158 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12159 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012160 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012161
Daniel Veillard65686452012-07-19 18:25:01 +080012162 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012163 /*
12164 * Specific handling if we autodetected an encoding, we should not
12165 * push more than the first line ... which depend on the encoding
12166 * And only push the rest once the final encoding was detected
12167 */
12168 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12169 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012170 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012171
12172 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12173 BAD_CAST "UTF-16")) ||
12174 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175 BAD_CAST "UTF16")))
12176 len = 90;
12177 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12178 BAD_CAST "UCS-4")) ||
12179 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180 BAD_CAST "UCS4")))
12181 len = 180;
12182
12183 if (ctxt->input->buf->rawconsumed < len)
12184 len -= ctxt->input->buf->rawconsumed;
12185
Raul Hudeaba9716a2010-03-15 10:13:29 +010012186 /*
12187 * Change size for reading the initial declaration only
12188 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12189 * will blindly copy extra bytes from memory.
12190 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012191 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012192 remain = size - len;
12193 size = len;
12194 } else {
12195 remain = 0;
12196 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012197 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012198 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012199 if (res < 0) {
12200 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012201 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012202 return (XML_PARSER_EOF);
12203 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012204 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012205#ifdef DEBUG_PUSH
12206 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12207#endif
12208
Owen Taylor3473f882001-02-23 17:55:21 +000012209 } else if (ctxt->instate != XML_PARSER_EOF) {
12210 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12211 xmlParserInputBufferPtr in = ctxt->input->buf;
12212 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12213 (in->raw != NULL)) {
12214 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012215 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12216 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012217
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012218 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012219 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012220 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012221 xmlGenericError(xmlGenericErrorContext,
12222 "xmlParseChunk: encoder error\n");
12223 return(XML_ERR_INVALID_ENCODING);
12224 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012225 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012226 }
12227 }
12228 }
Daniel Veillard65686452012-07-19 18:25:01 +080012229 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012230 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012231 } else {
12232 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12233 avail = xmlBufUse(ctxt->input->buf->buffer);
12234 /*
12235 * Depending on the current state it may not be such
12236 * a good idea to try parsing if there is nothing in the chunk
12237 * which would be worth doing a parser state transition and we
12238 * need to wait for more data
12239 */
12240 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12241 (old_avail == 0) || (avail == 0) ||
12242 (xmlParseCheckTransition(ctxt,
12243 (const char *)&ctxt->input->base[old_avail],
12244 avail - old_avail)))
12245 xmlParseTryOrFinish(ctxt, terminate);
12246 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012247 if (ctxt->instate == XML_PARSER_EOF)
12248 return(ctxt->errNo);
12249
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012250 if ((ctxt->input != NULL) &&
12251 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12252 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12253 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12254 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012255 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012256 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012257 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12258 return(ctxt->errNo);
12259
12260 if (remain != 0) {
12261 chunk += size;
12262 size = remain;
12263 remain = 0;
12264 goto xmldecl_done;
12265 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012266 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12267 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012268 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12269 ctxt->input);
12270 size_t current = ctxt->input->cur - ctxt->input->base;
12271
Daniel Veillarda617e242006-01-09 14:38:44 +000012272 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012273
12274 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12275 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012276 }
Owen Taylor3473f882001-02-23 17:55:21 +000012277 if (terminate) {
12278 /*
12279 * Check for termination
12280 */
Daniel Veillard65686452012-07-19 18:25:01 +080012281 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012282
12283 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012284 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012285 cur_avail = ctxt->input->length -
12286 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012287 else
Daniel Veillard65686452012-07-19 18:25:01 +080012288 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12289 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012290 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012291
Owen Taylor3473f882001-02-23 17:55:21 +000012292 if ((ctxt->instate != XML_PARSER_EOF) &&
12293 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012294 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012295 }
Daniel Veillard65686452012-07-19 18:25:01 +080012296 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012297 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012298 }
Owen Taylor3473f882001-02-23 17:55:21 +000012299 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012300 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012301 ctxt->sax->endDocument(ctxt->userData);
12302 }
12303 ctxt->instate = XML_PARSER_EOF;
12304 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012305 if (ctxt->wellFormed == 0)
12306 return((xmlParserErrors) ctxt->errNo);
12307 else
12308 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012309}
12310
12311/************************************************************************
12312 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012313 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012314 * *
12315 ************************************************************************/
12316
12317/**
Owen Taylor3473f882001-02-23 17:55:21 +000012318 * xmlCreatePushParserCtxt:
12319 * @sax: a SAX handler
12320 * @user_data: The user data returned on SAX callbacks
12321 * @chunk: a pointer to an array of chars
12322 * @size: number of chars in the array
12323 * @filename: an optional file name or URI
12324 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012325 * Create a parser context for using the XML parser in push mode.
12326 * If @buffer and @size are non-NULL, the data is used to detect
12327 * the encoding. The remaining characters will be parsed so they
12328 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012329 * To allow content encoding detection, @size should be >= 4
12330 * The value of @filename is used for fetching external entities
12331 * and error/warning reports.
12332 *
12333 * Returns the new parser context or NULL
12334 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012335
Owen Taylor3473f882001-02-23 17:55:21 +000012336xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012337xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012338 const char *chunk, int size, const char *filename) {
12339 xmlParserCtxtPtr ctxt;
12340 xmlParserInputPtr inputStream;
12341 xmlParserInputBufferPtr buf;
12342 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12343
12344 /*
12345 * plug some encoding conversion routines
12346 */
12347 if ((chunk != NULL) && (size >= 4))
12348 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12349
12350 buf = xmlAllocParserInputBuffer(enc);
12351 if (buf == NULL) return(NULL);
12352
12353 ctxt = xmlNewParserCtxt();
12354 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012355 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012356 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012357 return(NULL);
12358 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012359 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012360 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12361 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012362 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012363 xmlFreeParserInputBuffer(buf);
12364 xmlFreeParserCtxt(ctxt);
12365 return(NULL);
12366 }
Owen Taylor3473f882001-02-23 17:55:21 +000012367 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012368#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012369 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012370#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012371 xmlFree(ctxt->sax);
12372 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12373 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012374 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012375 xmlFreeParserInputBuffer(buf);
12376 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012377 return(NULL);
12378 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012379 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12380 if (sax->initialized == XML_SAX2_MAGIC)
12381 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12382 else
12383 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012384 if (user_data != NULL)
12385 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012386 }
Owen Taylor3473f882001-02-23 17:55:21 +000012387 if (filename == NULL) {
12388 ctxt->directory = NULL;
12389 } else {
12390 ctxt->directory = xmlParserGetDirectory(filename);
12391 }
12392
12393 inputStream = xmlNewInputStream(ctxt);
12394 if (inputStream == NULL) {
12395 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012396 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012397 return(NULL);
12398 }
12399
12400 if (filename == NULL)
12401 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012402 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012403 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012404 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012405 if (inputStream->filename == NULL) {
12406 xmlFreeParserCtxt(ctxt);
12407 xmlFreeParserInputBuffer(buf);
12408 return(NULL);
12409 }
12410 }
Owen Taylor3473f882001-02-23 17:55:21 +000012411 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012412 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012413 inputPush(ctxt, inputStream);
12414
William M. Brack3a1cd212005-02-11 14:35:54 +000012415 /*
12416 * If the caller didn't provide an initial 'chunk' for determining
12417 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12418 * that it can be automatically determined later
12419 */
12420 if ((size == 0) || (chunk == NULL)) {
12421 ctxt->charset = XML_CHAR_ENCODING_NONE;
12422 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012423 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12424 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012425
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012426 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012427
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012428 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012429#ifdef DEBUG_PUSH
12430 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12431#endif
12432 }
12433
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012434 if (enc != XML_CHAR_ENCODING_NONE) {
12435 xmlSwitchEncoding(ctxt, enc);
12436 }
12437
Owen Taylor3473f882001-02-23 17:55:21 +000012438 return(ctxt);
12439}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012440#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012441
12442/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012443 * xmlHaltParser:
12444 * @ctxt: an XML parser context
12445 *
12446 * Blocks further parser processing don't override error
12447 * for internal use
12448 */
12449static void
12450xmlHaltParser(xmlParserCtxtPtr ctxt) {
12451 if (ctxt == NULL)
12452 return;
12453 ctxt->instate = XML_PARSER_EOF;
12454 ctxt->disableSAX = 1;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012455 while (ctxt->inputNr > 1)
12456 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012457 if (ctxt->input != NULL) {
12458 /*
12459 * in case there was a specific allocation deallocate before
12460 * overriding base
12461 */
12462 if (ctxt->input->free != NULL) {
12463 ctxt->input->free((xmlChar *) ctxt->input->base);
12464 ctxt->input->free = NULL;
12465 }
12466 ctxt->input->cur = BAD_CAST"";
12467 ctxt->input->base = ctxt->input->cur;
Nick Wellnhofer24246c72017-06-20 12:56:36 +020012468 ctxt->input->end = ctxt->input->cur;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012469 }
12470}
12471
12472/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012473 * xmlStopParser:
12474 * @ctxt: an XML parser context
12475 *
12476 * Blocks further parser processing
12477 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012478void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012479xmlStopParser(xmlParserCtxtPtr ctxt) {
12480 if (ctxt == NULL)
12481 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012482 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012483 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012484}
12485
12486/**
Owen Taylor3473f882001-02-23 17:55:21 +000012487 * xmlCreateIOParserCtxt:
12488 * @sax: a SAX handler
12489 * @user_data: The user data returned on SAX callbacks
12490 * @ioread: an I/O read function
12491 * @ioclose: an I/O close function
12492 * @ioctx: an I/O handler
12493 * @enc: the charset encoding if known
12494 *
12495 * Create a parser context for using the XML parser with an existing
12496 * I/O stream
12497 *
12498 * Returns the new parser context or NULL
12499 */
12500xmlParserCtxtPtr
12501xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12502 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12503 void *ioctx, xmlCharEncoding enc) {
12504 xmlParserCtxtPtr ctxt;
12505 xmlParserInputPtr inputStream;
12506 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012507
Daniel Veillard42595322004-11-08 10:52:06 +000012508 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012509
12510 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012511 if (buf == NULL) {
12512 if (ioclose != NULL)
12513 ioclose(ioctx);
12514 return (NULL);
12515 }
Owen Taylor3473f882001-02-23 17:55:21 +000012516
12517 ctxt = xmlNewParserCtxt();
12518 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012519 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012520 return(NULL);
12521 }
12522 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012523#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012524 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012525#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012526 xmlFree(ctxt->sax);
12527 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12528 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012529 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012530 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012531 return(NULL);
12532 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012533 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12534 if (sax->initialized == XML_SAX2_MAGIC)
12535 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12536 else
12537 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012538 if (user_data != NULL)
12539 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012540 }
Owen Taylor3473f882001-02-23 17:55:21 +000012541
12542 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12543 if (inputStream == NULL) {
12544 xmlFreeParserCtxt(ctxt);
12545 return(NULL);
12546 }
12547 inputPush(ctxt, inputStream);
12548
12549 return(ctxt);
12550}
12551
Daniel Veillard4432df22003-09-28 18:58:27 +000012552#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012553/************************************************************************
12554 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012555 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012556 * *
12557 ************************************************************************/
12558
12559/**
12560 * xmlIOParseDTD:
12561 * @sax: the SAX handler block or NULL
12562 * @input: an Input Buffer
12563 * @enc: the charset encoding if known
12564 *
12565 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012566 *
Owen Taylor3473f882001-02-23 17:55:21 +000012567 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012568 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012569 */
12570
12571xmlDtdPtr
12572xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12573 xmlCharEncoding enc) {
12574 xmlDtdPtr ret = NULL;
12575 xmlParserCtxtPtr ctxt;
12576 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012577 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012578
12579 if (input == NULL)
12580 return(NULL);
12581
12582 ctxt = xmlNewParserCtxt();
12583 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012584 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012585 return(NULL);
12586 }
12587
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012588 /* We are loading a DTD */
12589 ctxt->options |= XML_PARSE_DTDLOAD;
12590
Owen Taylor3473f882001-02-23 17:55:21 +000012591 /*
12592 * Set-up the SAX context
12593 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012594 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012595 if (ctxt->sax != NULL)
12596 xmlFree(ctxt->sax);
12597 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012598 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012599 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012600 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012601
12602 /*
12603 * generate a parser input from the I/O handler
12604 */
12605
Daniel Veillard43caefb2003-12-07 19:32:22 +000012606 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012607 if (pinput == NULL) {
12608 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012609 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012610 xmlFreeParserCtxt(ctxt);
12611 return(NULL);
12612 }
12613
12614 /*
12615 * plug some encoding conversion routines here.
12616 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012617 if (xmlPushInput(ctxt, pinput) < 0) {
12618 if (sax != NULL) ctxt->sax = NULL;
12619 xmlFreeParserCtxt(ctxt);
12620 return(NULL);
12621 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012622 if (enc != XML_CHAR_ENCODING_NONE) {
12623 xmlSwitchEncoding(ctxt, enc);
12624 }
Owen Taylor3473f882001-02-23 17:55:21 +000012625
12626 pinput->filename = NULL;
12627 pinput->line = 1;
12628 pinput->col = 1;
12629 pinput->base = ctxt->input->cur;
12630 pinput->cur = ctxt->input->cur;
12631 pinput->free = NULL;
12632
12633 /*
12634 * let's parse that entity knowing it's an external subset.
12635 */
12636 ctxt->inSubset = 2;
12637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012638 if (ctxt->myDoc == NULL) {
12639 xmlErrMemory(ctxt, "New Doc failed");
12640 return(NULL);
12641 }
12642 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012643 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12644 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012645
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012646 if ((enc == XML_CHAR_ENCODING_NONE) &&
12647 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012648 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012649 * Get the 4 first bytes and decode the charset
12650 * if enc != XML_CHAR_ENCODING_NONE
12651 * plug some encoding conversion routines.
12652 */
12653 start[0] = RAW;
12654 start[1] = NXT(1);
12655 start[2] = NXT(2);
12656 start[3] = NXT(3);
12657 enc = xmlDetectCharEncoding(start, 4);
12658 if (enc != XML_CHAR_ENCODING_NONE) {
12659 xmlSwitchEncoding(ctxt, enc);
12660 }
12661 }
12662
Owen Taylor3473f882001-02-23 17:55:21 +000012663 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12664
12665 if (ctxt->myDoc != NULL) {
12666 if (ctxt->wellFormed) {
12667 ret = ctxt->myDoc->extSubset;
12668 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012669 if (ret != NULL) {
12670 xmlNodePtr tmp;
12671
12672 ret->doc = NULL;
12673 tmp = ret->children;
12674 while (tmp != NULL) {
12675 tmp->doc = NULL;
12676 tmp = tmp->next;
12677 }
12678 }
Owen Taylor3473f882001-02-23 17:55:21 +000012679 } else {
12680 ret = NULL;
12681 }
12682 xmlFreeDoc(ctxt->myDoc);
12683 ctxt->myDoc = NULL;
12684 }
12685 if (sax != NULL) ctxt->sax = NULL;
12686 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012687
Owen Taylor3473f882001-02-23 17:55:21 +000012688 return(ret);
12689}
12690
12691/**
12692 * xmlSAXParseDTD:
12693 * @sax: the SAX handler block
12694 * @ExternalID: a NAME* containing the External ID of the DTD
12695 * @SystemID: a NAME* containing the URL to the DTD
12696 *
12697 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012698 *
Owen Taylor3473f882001-02-23 17:55:21 +000012699 * Returns the resulting xmlDtdPtr or NULL in case of error.
12700 */
12701
12702xmlDtdPtr
12703xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12704 const xmlChar *SystemID) {
12705 xmlDtdPtr ret = NULL;
12706 xmlParserCtxtPtr ctxt;
12707 xmlParserInputPtr input = NULL;
12708 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012709 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012710
12711 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12712
12713 ctxt = xmlNewParserCtxt();
12714 if (ctxt == NULL) {
12715 return(NULL);
12716 }
12717
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012718 /* We are loading a DTD */
12719 ctxt->options |= XML_PARSE_DTDLOAD;
12720
Owen Taylor3473f882001-02-23 17:55:21 +000012721 /*
12722 * Set-up the SAX context
12723 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012724 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012725 if (ctxt->sax != NULL)
12726 xmlFree(ctxt->sax);
12727 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012728 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012729 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012730
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012731 /*
12732 * Canonicalise the system ID
12733 */
12734 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012735 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012736 xmlFreeParserCtxt(ctxt);
12737 return(NULL);
12738 }
Owen Taylor3473f882001-02-23 17:55:21 +000012739
12740 /*
12741 * Ask the Entity resolver to load the damn thing
12742 */
12743
12744 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012745 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12746 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012747 if (input == NULL) {
12748 if (sax != NULL) ctxt->sax = NULL;
12749 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012750 if (systemIdCanonic != NULL)
12751 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012752 return(NULL);
12753 }
12754
12755 /*
12756 * plug some encoding conversion routines here.
12757 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012758 if (xmlPushInput(ctxt, input) < 0) {
12759 if (sax != NULL) ctxt->sax = NULL;
12760 xmlFreeParserCtxt(ctxt);
12761 if (systemIdCanonic != NULL)
12762 xmlFree(systemIdCanonic);
12763 return(NULL);
12764 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012765 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12766 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12767 xmlSwitchEncoding(ctxt, enc);
12768 }
Owen Taylor3473f882001-02-23 17:55:21 +000012769
12770 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012771 input->filename = (char *) systemIdCanonic;
12772 else
12773 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012774 input->line = 1;
12775 input->col = 1;
12776 input->base = ctxt->input->cur;
12777 input->cur = ctxt->input->cur;
12778 input->free = NULL;
12779
12780 /*
12781 * let's parse that entity knowing it's an external subset.
12782 */
12783 ctxt->inSubset = 2;
12784 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012785 if (ctxt->myDoc == NULL) {
12786 xmlErrMemory(ctxt, "New Doc failed");
12787 if (sax != NULL) ctxt->sax = NULL;
12788 xmlFreeParserCtxt(ctxt);
12789 return(NULL);
12790 }
12791 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012792 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793 ExternalID, SystemID);
12794 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12795
12796 if (ctxt->myDoc != NULL) {
12797 if (ctxt->wellFormed) {
12798 ret = ctxt->myDoc->extSubset;
12799 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012800 if (ret != NULL) {
12801 xmlNodePtr tmp;
12802
12803 ret->doc = NULL;
12804 tmp = ret->children;
12805 while (tmp != NULL) {
12806 tmp->doc = NULL;
12807 tmp = tmp->next;
12808 }
12809 }
Owen Taylor3473f882001-02-23 17:55:21 +000012810 } else {
12811 ret = NULL;
12812 }
12813 xmlFreeDoc(ctxt->myDoc);
12814 ctxt->myDoc = NULL;
12815 }
12816 if (sax != NULL) ctxt->sax = NULL;
12817 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012818
Owen Taylor3473f882001-02-23 17:55:21 +000012819 return(ret);
12820}
12821
Daniel Veillard4432df22003-09-28 18:58:27 +000012822
Owen Taylor3473f882001-02-23 17:55:21 +000012823/**
12824 * xmlParseDTD:
12825 * @ExternalID: a NAME* containing the External ID of the DTD
12826 * @SystemID: a NAME* containing the URL to the DTD
12827 *
12828 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012829 *
Owen Taylor3473f882001-02-23 17:55:21 +000012830 * Returns the resulting xmlDtdPtr or NULL in case of error.
12831 */
12832
12833xmlDtdPtr
12834xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12835 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12836}
Daniel Veillard4432df22003-09-28 18:58:27 +000012837#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012838
12839/************************************************************************
12840 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012841 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012842 * *
12843 ************************************************************************/
12844
12845/**
Owen Taylor3473f882001-02-23 17:55:21 +000012846 * xmlParseCtxtExternalEntity:
12847 * @ctx: the existing parsing context
12848 * @URL: the URL for the entity to load
12849 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012850 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012851 *
12852 * Parse an external general entity within an existing parsing context
12853 * An external general parsed entity is well-formed if it matches the
12854 * production labeled extParsedEnt.
12855 *
12856 * [78] extParsedEnt ::= TextDecl? content
12857 *
12858 * Returns 0 if the entity is well formed, -1 in case of args problem and
12859 * the parser error code otherwise
12860 */
12861
12862int
12863xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012864 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012865 xmlParserCtxtPtr ctxt;
12866 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012867 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012868 xmlSAXHandlerPtr oldsax = NULL;
12869 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012870 xmlChar start[4];
12871 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012872
Daniel Veillardce682bc2004-11-05 17:22:25 +000012873 if (ctx == NULL) return(-1);
12874
Daniel Veillard0161e632008-08-28 15:36:32 +000012875 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12876 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012877 return(XML_ERR_ENTITY_LOOP);
12878 }
12879
Daniel Veillardcda96922001-08-21 10:56:31 +000012880 if (lst != NULL)
12881 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012882 if ((URL == NULL) && (ID == NULL))
12883 return(-1);
12884 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12885 return(-1);
12886
Rob Richards798743a2009-06-19 13:54:25 -040012887 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012888 if (ctxt == NULL) {
12889 return(-1);
12890 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012891
Owen Taylor3473f882001-02-23 17:55:21 +000012892 oldsax = ctxt->sax;
12893 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012894 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012895 newDoc = xmlNewDoc(BAD_CAST "1.0");
12896 if (newDoc == NULL) {
12897 xmlFreeParserCtxt(ctxt);
12898 return(-1);
12899 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012900 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012901 if (ctx->myDoc->dict) {
12902 newDoc->dict = ctx->myDoc->dict;
12903 xmlDictReference(newDoc->dict);
12904 }
Owen Taylor3473f882001-02-23 17:55:21 +000012905 if (ctx->myDoc != NULL) {
12906 newDoc->intSubset = ctx->myDoc->intSubset;
12907 newDoc->extSubset = ctx->myDoc->extSubset;
12908 }
12909 if (ctx->myDoc->URL != NULL) {
12910 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12911 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012912 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12913 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012914 ctxt->sax = oldsax;
12915 xmlFreeParserCtxt(ctxt);
12916 newDoc->intSubset = NULL;
12917 newDoc->extSubset = NULL;
12918 xmlFreeDoc(newDoc);
12919 return(-1);
12920 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012921 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012922 nodePush(ctxt, newDoc->children);
12923 if (ctx->myDoc == NULL) {
12924 ctxt->myDoc = newDoc;
12925 } else {
12926 ctxt->myDoc = ctx->myDoc;
12927 newDoc->children->doc = ctx->myDoc;
12928 }
12929
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012930 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012931 * Get the 4 first bytes and decode the charset
12932 * if enc != XML_CHAR_ENCODING_NONE
12933 * plug some encoding conversion routines.
12934 */
12935 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012936 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12937 start[0] = RAW;
12938 start[1] = NXT(1);
12939 start[2] = NXT(2);
12940 start[3] = NXT(3);
12941 enc = xmlDetectCharEncoding(start, 4);
12942 if (enc != XML_CHAR_ENCODING_NONE) {
12943 xmlSwitchEncoding(ctxt, enc);
12944 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012945 }
12946
Owen Taylor3473f882001-02-23 17:55:21 +000012947 /*
12948 * Parse a possible text declaration first
12949 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012950 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012951 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012952 /*
12953 * An XML-1.0 document can't reference an entity not XML-1.0
12954 */
12955 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12956 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012957 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012958 "Version mismatch between document and entity\n");
12959 }
Owen Taylor3473f882001-02-23 17:55:21 +000012960 }
12961
12962 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012963 * If the user provided its own SAX callbacks then reuse the
12964 * useData callback field, otherwise the expected setup in a
12965 * DOM builder is to have userData == ctxt
12966 */
12967 if (ctx->userData == ctx)
12968 ctxt->userData = ctxt;
12969 else
12970 ctxt->userData = ctx->userData;
12971
12972 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012973 * Doing validity checking on chunk doesn't make sense
12974 */
12975 ctxt->instate = XML_PARSER_CONTENT;
12976 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012977 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012978 ctxt->loadsubset = ctx->loadsubset;
12979 ctxt->depth = ctx->depth + 1;
12980 ctxt->replaceEntities = ctx->replaceEntities;
12981 if (ctxt->validate) {
12982 ctxt->vctxt.error = ctx->vctxt.error;
12983 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012984 } else {
12985 ctxt->vctxt.error = NULL;
12986 ctxt->vctxt.warning = NULL;
12987 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012988 ctxt->vctxt.nodeTab = NULL;
12989 ctxt->vctxt.nodeNr = 0;
12990 ctxt->vctxt.nodeMax = 0;
12991 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012992 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12993 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012994 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12995 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12996 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012997 ctxt->dictNames = ctx->dictNames;
12998 ctxt->attsDefault = ctx->attsDefault;
12999 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013000 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013001
13002 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013003
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013004 ctx->validate = ctxt->validate;
13005 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013006 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013007 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013008 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013009 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013010 }
13011 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013012 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013013 }
13014
13015 if (!ctxt->wellFormed) {
13016 if (ctxt->errNo == 0)
13017 ret = 1;
13018 else
13019 ret = ctxt->errNo;
13020 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013021 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013022 xmlNodePtr cur;
13023
13024 /*
13025 * Return the newly created nodeset after unlinking it from
13026 * they pseudo parent.
13027 */
13028 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013029 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013030 while (cur != NULL) {
13031 cur->parent = NULL;
13032 cur = cur->next;
13033 }
13034 newDoc->children->children = NULL;
13035 }
13036 ret = 0;
13037 }
13038 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013039 ctxt->dict = NULL;
13040 ctxt->attsDefault = NULL;
13041 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013042 xmlFreeParserCtxt(ctxt);
13043 newDoc->intSubset = NULL;
13044 newDoc->extSubset = NULL;
13045 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013046
Owen Taylor3473f882001-02-23 17:55:21 +000013047 return(ret);
13048}
13049
13050/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013051 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013052 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013053 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013054 * @sax: the SAX handler bloc (possibly NULL)
13055 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13056 * @depth: Used for loop detection, use 0
13057 * @URL: the URL for the entity to load
13058 * @ID: the System ID for the entity to load
13059 * @list: the return value for the set of parsed nodes
13060 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013061 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013062 *
13063 * Returns 0 if the entity is well formed, -1 in case of args problem and
13064 * the parser error code otherwise
13065 */
13066
Daniel Veillard7d515752003-09-26 19:12:37 +000013067static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013068xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13069 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013070 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013071 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013072 xmlParserCtxtPtr ctxt;
13073 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013074 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013075 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013076 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013077 xmlChar start[4];
13078 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013079
Daniel Veillard0161e632008-08-28 15:36:32 +000013080 if (((depth > 40) &&
13081 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13082 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013083 return(XML_ERR_ENTITY_LOOP);
13084 }
13085
Owen Taylor3473f882001-02-23 17:55:21 +000013086 if (list != NULL)
13087 *list = NULL;
13088 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013089 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013090 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013091 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013092
13093
Rob Richards9c0aa472009-03-26 18:10:19 +000013094 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013095 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013096 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013097 if (oldctxt != NULL) {
13098 ctxt->_private = oldctxt->_private;
13099 ctxt->loadsubset = oldctxt->loadsubset;
13100 ctxt->validate = oldctxt->validate;
13101 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013102 ctxt->record_info = oldctxt->record_info;
13103 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13104 ctxt->node_seq.length = oldctxt->node_seq.length;
13105 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013106 } else {
13107 /*
13108 * Doing validity checking on chunk without context
13109 * doesn't make sense
13110 */
13111 ctxt->_private = NULL;
13112 ctxt->validate = 0;
13113 ctxt->external = 2;
13114 ctxt->loadsubset = 0;
13115 }
Owen Taylor3473f882001-02-23 17:55:21 +000013116 if (sax != NULL) {
13117 oldsax = ctxt->sax;
13118 ctxt->sax = sax;
13119 if (user_data != NULL)
13120 ctxt->userData = user_data;
13121 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013122 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013123 newDoc = xmlNewDoc(BAD_CAST "1.0");
13124 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013125 ctxt->node_seq.maximum = 0;
13126 ctxt->node_seq.length = 0;
13127 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013128 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013129 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013130 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013131 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013132 newDoc->intSubset = doc->intSubset;
13133 newDoc->extSubset = doc->extSubset;
13134 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013135 xmlDictReference(newDoc->dict);
13136
Owen Taylor3473f882001-02-23 17:55:21 +000013137 if (doc->URL != NULL) {
13138 newDoc->URL = xmlStrdup(doc->URL);
13139 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013140 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13141 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013142 if (sax != NULL)
13143 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013144 ctxt->node_seq.maximum = 0;
13145 ctxt->node_seq.length = 0;
13146 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013147 xmlFreeParserCtxt(ctxt);
13148 newDoc->intSubset = NULL;
13149 newDoc->extSubset = NULL;
13150 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013151 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013152 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013153 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013154 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013155 ctxt->myDoc = doc;
13156 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013157
Daniel Veillard0161e632008-08-28 15:36:32 +000013158 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013159 * Get the 4 first bytes and decode the charset
13160 * if enc != XML_CHAR_ENCODING_NONE
13161 * plug some encoding conversion routines.
13162 */
13163 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013164 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13165 start[0] = RAW;
13166 start[1] = NXT(1);
13167 start[2] = NXT(2);
13168 start[3] = NXT(3);
13169 enc = xmlDetectCharEncoding(start, 4);
13170 if (enc != XML_CHAR_ENCODING_NONE) {
13171 xmlSwitchEncoding(ctxt, enc);
13172 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013173 }
13174
Owen Taylor3473f882001-02-23 17:55:21 +000013175 /*
13176 * Parse a possible text declaration first
13177 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013178 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013179 xmlParseTextDecl(ctxt);
13180 }
13181
Owen Taylor3473f882001-02-23 17:55:21 +000013182 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013183 ctxt->depth = depth;
13184
13185 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013186
Daniel Veillard561b7f82002-03-20 21:55:57 +000013187 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013188 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013189 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013190 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013191 }
13192 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013193 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013194 }
13195
13196 if (!ctxt->wellFormed) {
13197 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013198 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013199 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013200 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013201 } else {
13202 if (list != NULL) {
13203 xmlNodePtr cur;
13204
13205 /*
13206 * Return the newly created nodeset after unlinking it from
13207 * they pseudo parent.
13208 */
13209 cur = newDoc->children->children;
13210 *list = cur;
13211 while (cur != NULL) {
13212 cur->parent = NULL;
13213 cur = cur->next;
13214 }
13215 newDoc->children->children = NULL;
13216 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013217 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013218 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013219
13220 /*
13221 * Record in the parent context the number of entities replacement
13222 * done when parsing that reference.
13223 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013224 if (oldctxt != NULL)
13225 oldctxt->nbentities += ctxt->nbentities;
13226
Daniel Veillard0161e632008-08-28 15:36:32 +000013227 /*
13228 * Also record the size of the entity parsed
13229 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013230 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013231 oldctxt->sizeentities += ctxt->input->consumed;
13232 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13233 }
13234 /*
13235 * And record the last error if any
13236 */
Nick Wellnhofer3eef3f32017-06-20 16:13:57 +020013237 if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
Daniel Veillard0161e632008-08-28 15:36:32 +000013238 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13239
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013240 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013241 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013242 if (oldctxt != NULL) {
13243 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13244 oldctxt->node_seq.length = ctxt->node_seq.length;
13245 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13246 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013247 ctxt->node_seq.maximum = 0;
13248 ctxt->node_seq.length = 0;
13249 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013250 xmlFreeParserCtxt(ctxt);
13251 newDoc->intSubset = NULL;
13252 newDoc->extSubset = NULL;
13253 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013254
Owen Taylor3473f882001-02-23 17:55:21 +000013255 return(ret);
13256}
13257
Daniel Veillard81273902003-09-30 00:43:48 +000013258#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013259/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013260 * xmlParseExternalEntity:
13261 * @doc: the document the chunk pertains to
13262 * @sax: the SAX handler bloc (possibly NULL)
13263 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13264 * @depth: Used for loop detection, use 0
13265 * @URL: the URL for the entity to load
13266 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013267 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013268 *
13269 * Parse an external general entity
13270 * An external general parsed entity is well-formed if it matches the
13271 * production labeled extParsedEnt.
13272 *
13273 * [78] extParsedEnt ::= TextDecl? content
13274 *
13275 * Returns 0 if the entity is well formed, -1 in case of args problem and
13276 * the parser error code otherwise
13277 */
13278
13279int
13280xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013281 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013282 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013283 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013284}
13285
13286/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013287 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013288 * @doc: the document the chunk pertains to
13289 * @sax: the SAX handler bloc (possibly NULL)
13290 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13291 * @depth: Used for loop detection, use 0
13292 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013293 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013294 *
13295 * Parse a well-balanced chunk of an XML document
13296 * called by the parser
13297 * The allowed sequence for the Well Balanced Chunk is the one defined by
13298 * the content production in the XML grammar:
13299 *
13300 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13301 *
13302 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13303 * the parser error code otherwise
13304 */
13305
13306int
13307xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013308 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013309 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13310 depth, string, lst, 0 );
13311}
Daniel Veillard81273902003-09-30 00:43:48 +000013312#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013313
13314/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013315 * xmlParseBalancedChunkMemoryInternal:
13316 * @oldctxt: the existing parsing context
13317 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13318 * @user_data: the user data field for the parser context
13319 * @lst: the return value for the set of parsed nodes
13320 *
13321 *
13322 * Parse a well-balanced chunk of an XML document
13323 * called by the parser
13324 * The allowed sequence for the Well Balanced Chunk is the one defined by
13325 * the content production in the XML grammar:
13326 *
13327 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13328 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013329 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13330 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013331 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013332 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013333 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013334 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013335static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013336xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13337 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13338 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013339 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013340 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013341 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013342 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013343 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013344 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013345 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013346#ifdef SAX2
13347 int i;
13348#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013349
Daniel Veillard0161e632008-08-28 15:36:32 +000013350 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13351 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013352 return(XML_ERR_ENTITY_LOOP);
13353 }
13354
13355
13356 if (lst != NULL)
13357 *lst = NULL;
13358 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013359 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013360
13361 size = xmlStrlen(string);
13362
13363 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013364 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013365 if (user_data != NULL)
13366 ctxt->userData = user_data;
13367 else
13368 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013369 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13370 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013371 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13372 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13373 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013374
Daniel Veillard74eaec12009-08-26 15:57:20 +020013375#ifdef SAX2
13376 /* propagate namespaces down the entity */
13377 for (i = 0;i < oldctxt->nsNr;i += 2) {
13378 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13379 }
13380#endif
13381
Daniel Veillard328f48c2002-11-15 15:24:34 +000013382 oldsax = ctxt->sax;
13383 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013384 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013385 ctxt->replaceEntities = oldctxt->replaceEntities;
13386 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013387
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013388 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013389 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013390 newDoc = xmlNewDoc(BAD_CAST "1.0");
13391 if (newDoc == NULL) {
13392 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013393 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013394 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013395 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013396 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013397 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013398 newDoc->dict = ctxt->dict;
13399 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013400 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013401 } else {
13402 ctxt->myDoc = oldctxt->myDoc;
13403 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013404 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013405 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013406 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13407 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013408 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013409 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013410 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013411 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013412 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013413 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013414 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013415 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013416 ctxt->myDoc->children = NULL;
13417 ctxt->myDoc->last = NULL;
13418 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013419 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013420 ctxt->instate = XML_PARSER_CONTENT;
13421 ctxt->depth = oldctxt->depth + 1;
13422
Daniel Veillard328f48c2002-11-15 15:24:34 +000013423 ctxt->validate = 0;
13424 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013425 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13426 /*
13427 * ID/IDREF registration will be done in xmlValidateElement below
13428 */
13429 ctxt->loadsubset |= XML_SKIP_IDS;
13430 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013431 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013432 ctxt->attsDefault = oldctxt->attsDefault;
13433 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013434
Daniel Veillard68e9e742002-11-16 15:35:11 +000013435 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013436 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013437 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013438 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013439 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013440 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013441 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013442 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013443 }
13444
13445 if (!ctxt->wellFormed) {
13446 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013447 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013448 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013449 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013450 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013451 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013452 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013453
William M. Brack7b9154b2003-09-27 19:23:50 +000013454 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013455 xmlNodePtr cur;
13456
13457 /*
13458 * Return the newly created nodeset after unlinking it from
13459 * they pseudo parent.
13460 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013461 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013462 *lst = cur;
13463 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013464#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013465 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13466 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13467 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013468 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13469 oldctxt->myDoc, cur);
13470 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013471#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013472 cur->parent = NULL;
13473 cur = cur->next;
13474 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013475 ctxt->myDoc->children->children = NULL;
13476 }
13477 if (ctxt->myDoc != NULL) {
13478 xmlFreeNode(ctxt->myDoc->children);
13479 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013480 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013481 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013482
13483 /*
13484 * Record in the parent context the number of entities replacement
13485 * done when parsing that reference.
13486 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013487 if (oldctxt != NULL)
13488 oldctxt->nbentities += ctxt->nbentities;
13489
Daniel Veillard0161e632008-08-28 15:36:32 +000013490 /*
13491 * Also record the last error if any
13492 */
13493 if (ctxt->lastError.code != XML_ERR_OK)
13494 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13495
Daniel Veillard328f48c2002-11-15 15:24:34 +000013496 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013497 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013498 ctxt->attsDefault = NULL;
13499 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013500 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013501 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013502 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013503 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013504
Daniel Veillard328f48c2002-11-15 15:24:34 +000013505 return(ret);
13506}
13507
Daniel Veillard29b17482004-08-16 00:39:03 +000013508/**
13509 * xmlParseInNodeContext:
13510 * @node: the context node
13511 * @data: the input string
13512 * @datalen: the input string length in bytes
13513 * @options: a combination of xmlParserOption
13514 * @lst: the return value for the set of parsed nodes
13515 *
13516 * Parse a well-balanced chunk of an XML document
13517 * within the context (DTD, namespaces, etc ...) of the given node.
13518 *
13519 * The allowed sequence for the data is a Well Balanced Chunk defined by
13520 * the content production in the XML grammar:
13521 *
13522 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13523 *
13524 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13525 * error code otherwise
13526 */
13527xmlParserErrors
13528xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13529 int options, xmlNodePtr *lst) {
13530#ifdef SAX2
13531 xmlParserCtxtPtr ctxt;
13532 xmlDocPtr doc = NULL;
13533 xmlNodePtr fake, cur;
13534 int nsnr = 0;
13535
13536 xmlParserErrors ret = XML_ERR_OK;
13537
13538 /*
13539 * check all input parameters, grab the document
13540 */
13541 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13542 return(XML_ERR_INTERNAL_ERROR);
13543 switch (node->type) {
13544 case XML_ELEMENT_NODE:
13545 case XML_ATTRIBUTE_NODE:
13546 case XML_TEXT_NODE:
13547 case XML_CDATA_SECTION_NODE:
13548 case XML_ENTITY_REF_NODE:
13549 case XML_PI_NODE:
13550 case XML_COMMENT_NODE:
13551 case XML_DOCUMENT_NODE:
13552 case XML_HTML_DOCUMENT_NODE:
13553 break;
13554 default:
13555 return(XML_ERR_INTERNAL_ERROR);
13556
13557 }
13558 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13559 (node->type != XML_DOCUMENT_NODE) &&
13560 (node->type != XML_HTML_DOCUMENT_NODE))
13561 node = node->parent;
13562 if (node == NULL)
13563 return(XML_ERR_INTERNAL_ERROR);
13564 if (node->type == XML_ELEMENT_NODE)
13565 doc = node->doc;
13566 else
13567 doc = (xmlDocPtr) node;
13568 if (doc == NULL)
13569 return(XML_ERR_INTERNAL_ERROR);
13570
13571 /*
13572 * allocate a context and set-up everything not related to the
13573 * node position in the tree
13574 */
13575 if (doc->type == XML_DOCUMENT_NODE)
13576 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13577#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013578 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013579 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013580 /*
13581 * When parsing in context, it makes no sense to add implied
13582 * elements like html/body/etc...
13583 */
13584 options |= HTML_PARSE_NOIMPLIED;
13585 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013586#endif
13587 else
13588 return(XML_ERR_INTERNAL_ERROR);
13589
13590 if (ctxt == NULL)
13591 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013592
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013593 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013594 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13595 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13596 * we must wait until the last moment to free the original one.
13597 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013598 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013599 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013600 xmlDictFree(ctxt->dict);
13601 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013602 } else
13603 options |= XML_PARSE_NODICT;
13604
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013605 if (doc->encoding != NULL) {
13606 xmlCharEncodingHandlerPtr hdlr;
13607
13608 if (ctxt->encoding != NULL)
13609 xmlFree((xmlChar *) ctxt->encoding);
13610 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13611
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013612 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013613 if (hdlr != NULL) {
13614 xmlSwitchToEncoding(ctxt, hdlr);
13615 } else {
13616 return(XML_ERR_UNSUPPORTED_ENCODING);
13617 }
13618 }
13619
Daniel Veillard37334572008-07-31 08:20:02 +000013620 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013621 xmlDetectSAX2(ctxt);
13622 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013623 /* parsing in context, i.e. as within existing content */
13624 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013625
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013626 fake = xmlNewComment(NULL);
13627 if (fake == NULL) {
13628 xmlFreeParserCtxt(ctxt);
13629 return(XML_ERR_NO_MEMORY);
13630 }
13631 xmlAddChild(node, fake);
13632
Daniel Veillard29b17482004-08-16 00:39:03 +000013633 if (node->type == XML_ELEMENT_NODE) {
13634 nodePush(ctxt, node);
13635 /*
13636 * initialize the SAX2 namespaces stack
13637 */
13638 cur = node;
13639 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13640 xmlNsPtr ns = cur->nsDef;
13641 const xmlChar *iprefix, *ihref;
13642
13643 while (ns != NULL) {
13644 if (ctxt->dict) {
13645 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13646 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13647 } else {
13648 iprefix = ns->prefix;
13649 ihref = ns->href;
13650 }
13651
13652 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13653 nsPush(ctxt, iprefix, ihref);
13654 nsnr++;
13655 }
13656 ns = ns->next;
13657 }
13658 cur = cur->parent;
13659 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013660 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013661
13662 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13663 /*
13664 * ID/IDREF registration will be done in xmlValidateElement below
13665 */
13666 ctxt->loadsubset |= XML_SKIP_IDS;
13667 }
13668
Daniel Veillard499cc922006-01-18 17:22:35 +000013669#ifdef LIBXML_HTML_ENABLED
13670 if (doc->type == XML_HTML_DOCUMENT_NODE)
13671 __htmlParseContent(ctxt);
13672 else
13673#endif
13674 xmlParseContent(ctxt);
13675
Daniel Veillard29b17482004-08-16 00:39:03 +000013676 nsPop(ctxt, nsnr);
13677 if ((RAW == '<') && (NXT(1) == '/')) {
13678 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13679 } else if (RAW != 0) {
13680 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13681 }
13682 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13683 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13684 ctxt->wellFormed = 0;
13685 }
13686
13687 if (!ctxt->wellFormed) {
13688 if (ctxt->errNo == 0)
13689 ret = XML_ERR_INTERNAL_ERROR;
13690 else
13691 ret = (xmlParserErrors)ctxt->errNo;
13692 } else {
13693 ret = XML_ERR_OK;
13694 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013695
Daniel Veillard29b17482004-08-16 00:39:03 +000013696 /*
13697 * Return the newly created nodeset after unlinking it from
13698 * the pseudo sibling.
13699 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013700
Daniel Veillard29b17482004-08-16 00:39:03 +000013701 cur = fake->next;
13702 fake->next = NULL;
13703 node->last = fake;
13704
13705 if (cur != NULL) {
13706 cur->prev = NULL;
13707 }
13708
13709 *lst = cur;
13710
13711 while (cur != NULL) {
13712 cur->parent = NULL;
13713 cur = cur->next;
13714 }
13715
13716 xmlUnlinkNode(fake);
13717 xmlFreeNode(fake);
13718
13719
13720 if (ret != XML_ERR_OK) {
13721 xmlFreeNodeList(*lst);
13722 *lst = NULL;
13723 }
William M. Brackc3f81342004-10-03 01:22:44 +000013724
William M. Brackb7b54de2004-10-06 16:38:01 +000013725 if (doc->dict != NULL)
13726 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013727 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013728
Daniel Veillard29b17482004-08-16 00:39:03 +000013729 return(ret);
13730#else /* !SAX2 */
13731 return(XML_ERR_INTERNAL_ERROR);
13732#endif
13733}
13734
Daniel Veillard81273902003-09-30 00:43:48 +000013735#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013736/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013737 * xmlParseBalancedChunkMemoryRecover:
13738 * @doc: the document the chunk pertains to
13739 * @sax: the SAX handler bloc (possibly NULL)
13740 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13741 * @depth: Used for loop detection, use 0
13742 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13743 * @lst: the return value for the set of parsed nodes
13744 * @recover: return nodes even if the data is broken (use 0)
13745 *
13746 *
13747 * Parse a well-balanced chunk of an XML document
13748 * called by the parser
13749 * The allowed sequence for the Well Balanced Chunk is the one defined by
13750 * the content production in the XML grammar:
13751 *
13752 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13753 *
13754 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13755 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013756 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013757 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013758 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13759 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013760 */
13761int
13762xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013763 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013764 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013765 xmlParserCtxtPtr ctxt;
13766 xmlDocPtr newDoc;
13767 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013768 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013769 int size;
13770 int ret = 0;
13771
Daniel Veillard0161e632008-08-28 15:36:32 +000013772 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013773 return(XML_ERR_ENTITY_LOOP);
13774 }
13775
13776
Daniel Veillardcda96922001-08-21 10:56:31 +000013777 if (lst != NULL)
13778 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013779 if (string == NULL)
13780 return(-1);
13781
13782 size = xmlStrlen(string);
13783
13784 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13785 if (ctxt == NULL) return(-1);
13786 ctxt->userData = ctxt;
13787 if (sax != NULL) {
13788 oldsax = ctxt->sax;
13789 ctxt->sax = sax;
13790 if (user_data != NULL)
13791 ctxt->userData = user_data;
13792 }
13793 newDoc = xmlNewDoc(BAD_CAST "1.0");
13794 if (newDoc == NULL) {
13795 xmlFreeParserCtxt(ctxt);
13796 return(-1);
13797 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013798 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013799 if ((doc != NULL) && (doc->dict != NULL)) {
13800 xmlDictFree(ctxt->dict);
13801 ctxt->dict = doc->dict;
13802 xmlDictReference(ctxt->dict);
13803 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13804 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13805 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13806 ctxt->dictNames = 1;
13807 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013808 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013809 }
Owen Taylor3473f882001-02-23 17:55:21 +000013810 if (doc != NULL) {
13811 newDoc->intSubset = doc->intSubset;
13812 newDoc->extSubset = doc->extSubset;
13813 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013814 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13815 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013816 if (sax != NULL)
13817 ctxt->sax = oldsax;
13818 xmlFreeParserCtxt(ctxt);
13819 newDoc->intSubset = NULL;
13820 newDoc->extSubset = NULL;
13821 xmlFreeDoc(newDoc);
13822 return(-1);
13823 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013824 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13825 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013826 if (doc == NULL) {
13827 ctxt->myDoc = newDoc;
13828 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013829 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013830 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013831 /* Ensure that doc has XML spec namespace */
13832 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13833 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013834 }
13835 ctxt->instate = XML_PARSER_CONTENT;
13836 ctxt->depth = depth;
13837
13838 /*
13839 * Doing validity checking on chunk doesn't make sense
13840 */
13841 ctxt->validate = 0;
13842 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013843 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013844
Daniel Veillardb39bc392002-10-26 19:29:51 +000013845 if ( doc != NULL ){
13846 content = doc->children;
13847 doc->children = NULL;
13848 xmlParseContent(ctxt);
13849 doc->children = content;
13850 }
13851 else {
13852 xmlParseContent(ctxt);
13853 }
Owen Taylor3473f882001-02-23 17:55:21 +000013854 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013855 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013856 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013857 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013858 }
13859 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013860 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013861 }
13862
13863 if (!ctxt->wellFormed) {
13864 if (ctxt->errNo == 0)
13865 ret = 1;
13866 else
13867 ret = ctxt->errNo;
13868 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013869 ret = 0;
13870 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013871
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013872 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13873 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013874
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013875 /*
13876 * Return the newly created nodeset after unlinking it from
13877 * they pseudo parent.
13878 */
13879 cur = newDoc->children->children;
13880 *lst = cur;
13881 while (cur != NULL) {
13882 xmlSetTreeDoc(cur, doc);
13883 cur->parent = NULL;
13884 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013885 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013886 newDoc->children->children = NULL;
13887 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013888
13889 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013890 ctxt->sax = oldsax;
13891 xmlFreeParserCtxt(ctxt);
13892 newDoc->intSubset = NULL;
13893 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013894 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013895 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013896
Owen Taylor3473f882001-02-23 17:55:21 +000013897 return(ret);
13898}
13899
13900/**
13901 * xmlSAXParseEntity:
13902 * @sax: the SAX handler block
13903 * @filename: the filename
13904 *
13905 * parse an XML external entity out of context and build a tree.
13906 * It use the given SAX function block to handle the parsing callback.
13907 * If sax is NULL, fallback to the default DOM tree building routines.
13908 *
13909 * [78] extParsedEnt ::= TextDecl? content
13910 *
13911 * This correspond to a "Well Balanced" chunk
13912 *
13913 * Returns the resulting document tree
13914 */
13915
13916xmlDocPtr
13917xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13918 xmlDocPtr ret;
13919 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013920
13921 ctxt = xmlCreateFileParserCtxt(filename);
13922 if (ctxt == NULL) {
13923 return(NULL);
13924 }
13925 if (sax != NULL) {
13926 if (ctxt->sax != NULL)
13927 xmlFree(ctxt->sax);
13928 ctxt->sax = sax;
13929 ctxt->userData = NULL;
13930 }
13931
Owen Taylor3473f882001-02-23 17:55:21 +000013932 xmlParseExtParsedEnt(ctxt);
13933
13934 if (ctxt->wellFormed)
13935 ret = ctxt->myDoc;
13936 else {
13937 ret = NULL;
13938 xmlFreeDoc(ctxt->myDoc);
13939 ctxt->myDoc = NULL;
13940 }
13941 if (sax != NULL)
13942 ctxt->sax = NULL;
13943 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013944
Owen Taylor3473f882001-02-23 17:55:21 +000013945 return(ret);
13946}
13947
13948/**
13949 * xmlParseEntity:
13950 * @filename: the filename
13951 *
13952 * parse an XML external entity out of context and build a tree.
13953 *
13954 * [78] extParsedEnt ::= TextDecl? content
13955 *
13956 * This correspond to a "Well Balanced" chunk
13957 *
13958 * Returns the resulting document tree
13959 */
13960
13961xmlDocPtr
13962xmlParseEntity(const char *filename) {
13963 return(xmlSAXParseEntity(NULL, filename));
13964}
Daniel Veillard81273902003-09-30 00:43:48 +000013965#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013966
13967/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013968 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013969 * @URL: the entity URL
13970 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013971 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013972 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013973 *
13974 * Create a parser context for an external entity
13975 * Automatic support for ZLIB/Compress compressed document is provided
13976 * by default if found at compile-time.
13977 *
13978 * Returns the new parser context or NULL
13979 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013980static xmlParserCtxtPtr
13981xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13982 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013983 xmlParserCtxtPtr ctxt;
13984 xmlParserInputPtr inputStream;
13985 char *directory = NULL;
13986 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013987
Owen Taylor3473f882001-02-23 17:55:21 +000013988 ctxt = xmlNewParserCtxt();
13989 if (ctxt == NULL) {
13990 return(NULL);
13991 }
13992
Daniel Veillard48247b42009-07-10 16:12:46 +020013993 if (pctx != NULL) {
13994 ctxt->options = pctx->options;
13995 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013996 }
13997
Owen Taylor3473f882001-02-23 17:55:21 +000013998 uri = xmlBuildURI(URL, base);
13999
14000 if (uri == NULL) {
14001 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14002 if (inputStream == NULL) {
14003 xmlFreeParserCtxt(ctxt);
14004 return(NULL);
14005 }
14006
14007 inputPush(ctxt, inputStream);
14008
14009 if ((ctxt->directory == NULL) && (directory == NULL))
14010 directory = xmlParserGetDirectory((char *)URL);
14011 if ((ctxt->directory == NULL) && (directory != NULL))
14012 ctxt->directory = directory;
14013 } else {
14014 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14015 if (inputStream == NULL) {
14016 xmlFree(uri);
14017 xmlFreeParserCtxt(ctxt);
14018 return(NULL);
14019 }
14020
14021 inputPush(ctxt, inputStream);
14022
14023 if ((ctxt->directory == NULL) && (directory == NULL))
14024 directory = xmlParserGetDirectory((char *)uri);
14025 if ((ctxt->directory == NULL) && (directory != NULL))
14026 ctxt->directory = directory;
14027 xmlFree(uri);
14028 }
Owen Taylor3473f882001-02-23 17:55:21 +000014029 return(ctxt);
14030}
14031
Rob Richards9c0aa472009-03-26 18:10:19 +000014032/**
14033 * xmlCreateEntityParserCtxt:
14034 * @URL: the entity URL
14035 * @ID: the entity PUBLIC ID
14036 * @base: a possible base for the target URI
14037 *
14038 * Create a parser context for an external entity
14039 * Automatic support for ZLIB/Compress compressed document is provided
14040 * by default if found at compile-time.
14041 *
14042 * Returns the new parser context or NULL
14043 */
14044xmlParserCtxtPtr
14045xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14046 const xmlChar *base) {
14047 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14048
14049}
14050
Owen Taylor3473f882001-02-23 17:55:21 +000014051/************************************************************************
14052 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014053 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014054 * *
14055 ************************************************************************/
14056
14057/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014058 * xmlCreateURLParserCtxt:
14059 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014060 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014061 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014062 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014063 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014064 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014065 *
14066 * Returns the new parser context or NULL
14067 */
14068xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014069xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014070{
14071 xmlParserCtxtPtr ctxt;
14072 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014073 char *directory = NULL;
14074
Owen Taylor3473f882001-02-23 17:55:21 +000014075 ctxt = xmlNewParserCtxt();
14076 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014077 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014078 return(NULL);
14079 }
14080
Daniel Veillarddf292f72005-01-16 19:00:15 +000014081 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014082 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014083 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014084
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014085 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014086 if (inputStream == NULL) {
14087 xmlFreeParserCtxt(ctxt);
14088 return(NULL);
14089 }
14090
Owen Taylor3473f882001-02-23 17:55:21 +000014091 inputPush(ctxt, inputStream);
14092 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014093 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014094 if ((ctxt->directory == NULL) && (directory != NULL))
14095 ctxt->directory = directory;
14096
14097 return(ctxt);
14098}
14099
Daniel Veillard61b93382003-11-03 14:28:31 +000014100/**
14101 * xmlCreateFileParserCtxt:
14102 * @filename: the filename
14103 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014104 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014105 * Automatic support for ZLIB/Compress compressed document is provided
14106 * by default if found at compile-time.
14107 *
14108 * Returns the new parser context or NULL
14109 */
14110xmlParserCtxtPtr
14111xmlCreateFileParserCtxt(const char *filename)
14112{
14113 return(xmlCreateURLParserCtxt(filename, 0));
14114}
14115
Daniel Veillard81273902003-09-30 00:43:48 +000014116#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014117/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014118 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014119 * @sax: the SAX handler block
14120 * @filename: the filename
14121 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14122 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014123 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014124 *
14125 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14126 * compressed document is provided by default if found at compile-time.
14127 * It use the given SAX function block to handle the parsing callback.
14128 * If sax is NULL, fallback to the default DOM tree building routines.
14129 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014130 * User data (void *) is stored within the parser context in the
14131 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014132 *
Owen Taylor3473f882001-02-23 17:55:21 +000014133 * Returns the resulting document tree
14134 */
14135
14136xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014137xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14138 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014139 xmlDocPtr ret;
14140 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014141
Daniel Veillard635ef722001-10-29 11:48:19 +000014142 xmlInitParser();
14143
Owen Taylor3473f882001-02-23 17:55:21 +000014144 ctxt = xmlCreateFileParserCtxt(filename);
14145 if (ctxt == NULL) {
14146 return(NULL);
14147 }
14148 if (sax != NULL) {
14149 if (ctxt->sax != NULL)
14150 xmlFree(ctxt->sax);
14151 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014152 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014153 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014154 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014155 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014156 }
Owen Taylor3473f882001-02-23 17:55:21 +000014157
Daniel Veillard37d2d162008-03-14 10:54:00 +000014158 if (ctxt->directory == NULL)
14159 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014160
Daniel Veillarddad3f682002-11-17 16:47:27 +000014161 ctxt->recovery = recovery;
14162
Owen Taylor3473f882001-02-23 17:55:21 +000014163 xmlParseDocument(ctxt);
14164
William M. Brackc07329e2003-09-08 01:57:30 +000014165 if ((ctxt->wellFormed) || recovery) {
14166 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014167 if (ret != NULL) {
14168 if (ctxt->input->buf->compressed > 0)
14169 ret->compression = 9;
14170 else
14171 ret->compression = ctxt->input->buf->compressed;
14172 }
William M. Brackc07329e2003-09-08 01:57:30 +000014173 }
Owen Taylor3473f882001-02-23 17:55:21 +000014174 else {
14175 ret = NULL;
14176 xmlFreeDoc(ctxt->myDoc);
14177 ctxt->myDoc = NULL;
14178 }
14179 if (sax != NULL)
14180 ctxt->sax = NULL;
14181 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014182
Owen Taylor3473f882001-02-23 17:55:21 +000014183 return(ret);
14184}
14185
14186/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014187 * xmlSAXParseFile:
14188 * @sax: the SAX handler block
14189 * @filename: the filename
14190 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14191 * documents
14192 *
14193 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14194 * compressed document is provided by default if found at compile-time.
14195 * It use the given SAX function block to handle the parsing callback.
14196 * If sax is NULL, fallback to the default DOM tree building routines.
14197 *
14198 * Returns the resulting document tree
14199 */
14200
14201xmlDocPtr
14202xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14203 int recovery) {
14204 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14205}
14206
14207/**
Owen Taylor3473f882001-02-23 17:55:21 +000014208 * xmlRecoverDoc:
14209 * @cur: a pointer to an array of xmlChar
14210 *
14211 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014212 * In the case the document is not Well Formed, a attempt to build a
14213 * tree is tried anyway
14214 *
14215 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014216 */
14217
14218xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014219xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014220 return(xmlSAXParseDoc(NULL, cur, 1));
14221}
14222
14223/**
14224 * xmlParseFile:
14225 * @filename: the filename
14226 *
14227 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14228 * compressed document is provided by default if found at compile-time.
14229 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014230 * Returns the resulting document tree if the file was wellformed,
14231 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014232 */
14233
14234xmlDocPtr
14235xmlParseFile(const char *filename) {
14236 return(xmlSAXParseFile(NULL, filename, 0));
14237}
14238
14239/**
14240 * xmlRecoverFile:
14241 * @filename: the filename
14242 *
14243 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14244 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014245 * In the case the document is not Well Formed, it attempts to build
14246 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014247 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014248 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014249 */
14250
14251xmlDocPtr
14252xmlRecoverFile(const char *filename) {
14253 return(xmlSAXParseFile(NULL, filename, 1));
14254}
14255
14256
14257/**
14258 * xmlSetupParserForBuffer:
14259 * @ctxt: an XML parser context
14260 * @buffer: a xmlChar * buffer
14261 * @filename: a file name
14262 *
14263 * Setup the parser context to parse a new buffer; Clears any prior
14264 * contents from the parser context. The buffer parameter must not be
14265 * NULL, but the filename parameter can be
14266 */
14267void
14268xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14269 const char* filename)
14270{
14271 xmlParserInputPtr input;
14272
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014273 if ((ctxt == NULL) || (buffer == NULL))
14274 return;
14275
Owen Taylor3473f882001-02-23 17:55:21 +000014276 input = xmlNewInputStream(ctxt);
14277 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014278 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014279 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014280 return;
14281 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014282
Owen Taylor3473f882001-02-23 17:55:21 +000014283 xmlClearParserCtxt(ctxt);
14284 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014285 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014286 input->base = buffer;
14287 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014288 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014289 inputPush(ctxt, input);
14290}
14291
14292/**
14293 * xmlSAXUserParseFile:
14294 * @sax: a SAX handler
14295 * @user_data: The user data returned on SAX callbacks
14296 * @filename: a file name
14297 *
14298 * parse an XML file and call the given SAX handler routines.
14299 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014300 *
Owen Taylor3473f882001-02-23 17:55:21 +000014301 * Returns 0 in case of success or a error number otherwise
14302 */
14303int
14304xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14305 const char *filename) {
14306 int ret = 0;
14307 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014308
Owen Taylor3473f882001-02-23 17:55:21 +000014309 ctxt = xmlCreateFileParserCtxt(filename);
14310 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014311 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014312 xmlFree(ctxt->sax);
14313 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014314 xmlDetectSAX2(ctxt);
14315
Owen Taylor3473f882001-02-23 17:55:21 +000014316 if (user_data != NULL)
14317 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014318
Owen Taylor3473f882001-02-23 17:55:21 +000014319 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014320
Owen Taylor3473f882001-02-23 17:55:21 +000014321 if (ctxt->wellFormed)
14322 ret = 0;
14323 else {
14324 if (ctxt->errNo != 0)
14325 ret = ctxt->errNo;
14326 else
14327 ret = -1;
14328 }
14329 if (sax != NULL)
14330 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014331 if (ctxt->myDoc != NULL) {
14332 xmlFreeDoc(ctxt->myDoc);
14333 ctxt->myDoc = NULL;
14334 }
Owen Taylor3473f882001-02-23 17:55:21 +000014335 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014336
Owen Taylor3473f882001-02-23 17:55:21 +000014337 return ret;
14338}
Daniel Veillard81273902003-09-30 00:43:48 +000014339#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014340
14341/************************************************************************
14342 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014343 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014344 * *
14345 ************************************************************************/
14346
14347/**
14348 * xmlCreateMemoryParserCtxt:
14349 * @buffer: a pointer to a char array
14350 * @size: the size of the array
14351 *
14352 * Create a parser context for an XML in-memory document.
14353 *
14354 * Returns the new parser context or NULL
14355 */
14356xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014357xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014358 xmlParserCtxtPtr ctxt;
14359 xmlParserInputPtr input;
14360 xmlParserInputBufferPtr buf;
14361
14362 if (buffer == NULL)
14363 return(NULL);
14364 if (size <= 0)
14365 return(NULL);
14366
14367 ctxt = xmlNewParserCtxt();
14368 if (ctxt == NULL)
14369 return(NULL);
14370
Daniel Veillard53350552003-09-18 13:35:51 +000014371 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014372 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014373 if (buf == NULL) {
14374 xmlFreeParserCtxt(ctxt);
14375 return(NULL);
14376 }
Owen Taylor3473f882001-02-23 17:55:21 +000014377
14378 input = xmlNewInputStream(ctxt);
14379 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014380 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014381 xmlFreeParserCtxt(ctxt);
14382 return(NULL);
14383 }
14384
14385 input->filename = NULL;
14386 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014387 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014388
14389 inputPush(ctxt, input);
14390 return(ctxt);
14391}
14392
Daniel Veillard81273902003-09-30 00:43:48 +000014393#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014394/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014395 * xmlSAXParseMemoryWithData:
14396 * @sax: the SAX handler block
14397 * @buffer: an pointer to a char array
14398 * @size: the size of the array
14399 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14400 * documents
14401 * @data: the userdata
14402 *
14403 * parse an XML in-memory block and use the given SAX function block
14404 * to handle the parsing callback. If sax is NULL, fallback to the default
14405 * DOM tree building routines.
14406 *
14407 * User data (void *) is stored within the parser context in the
14408 * context's _private member, so it is available nearly everywhere in libxml
14409 *
14410 * Returns the resulting document tree
14411 */
14412
14413xmlDocPtr
14414xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14415 int size, int recovery, void *data) {
14416 xmlDocPtr ret;
14417 xmlParserCtxtPtr ctxt;
14418
Daniel Veillardab2a7632009-07-09 08:45:03 +020014419 xmlInitParser();
14420
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014421 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14422 if (ctxt == NULL) return(NULL);
14423 if (sax != NULL) {
14424 if (ctxt->sax != NULL)
14425 xmlFree(ctxt->sax);
14426 ctxt->sax = sax;
14427 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014428 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014429 if (data!=NULL) {
14430 ctxt->_private=data;
14431 }
14432
Daniel Veillardadba5f12003-04-04 16:09:01 +000014433 ctxt->recovery = recovery;
14434
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014435 xmlParseDocument(ctxt);
14436
14437 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14438 else {
14439 ret = NULL;
14440 xmlFreeDoc(ctxt->myDoc);
14441 ctxt->myDoc = NULL;
14442 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014443 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014444 ctxt->sax = NULL;
14445 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014446
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014447 return(ret);
14448}
14449
14450/**
Owen Taylor3473f882001-02-23 17:55:21 +000014451 * xmlSAXParseMemory:
14452 * @sax: the SAX handler block
14453 * @buffer: an pointer to a char array
14454 * @size: the size of the array
14455 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14456 * documents
14457 *
14458 * parse an XML in-memory block and use the given SAX function block
14459 * to handle the parsing callback. If sax is NULL, fallback to the default
14460 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014461 *
Owen Taylor3473f882001-02-23 17:55:21 +000014462 * Returns the resulting document tree
14463 */
14464xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014465xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14466 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014467 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014468}
14469
14470/**
14471 * xmlParseMemory:
14472 * @buffer: an pointer to a char array
14473 * @size: the size of the array
14474 *
14475 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014476 *
Owen Taylor3473f882001-02-23 17:55:21 +000014477 * Returns the resulting document tree
14478 */
14479
Daniel Veillard50822cb2001-07-26 20:05:51 +000014480xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014481 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14482}
14483
14484/**
14485 * xmlRecoverMemory:
14486 * @buffer: an pointer to a char array
14487 * @size: the size of the array
14488 *
14489 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014490 * In the case the document is not Well Formed, an attempt to
14491 * build a tree is tried anyway
14492 *
14493 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014494 */
14495
Daniel Veillard50822cb2001-07-26 20:05:51 +000014496xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014497 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14498}
14499
14500/**
14501 * xmlSAXUserParseMemory:
14502 * @sax: a SAX handler
14503 * @user_data: The user data returned on SAX callbacks
14504 * @buffer: an in-memory XML document input
14505 * @size: the length of the XML document in bytes
14506 *
14507 * A better SAX parsing routine.
14508 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014509 *
Owen Taylor3473f882001-02-23 17:55:21 +000014510 * Returns 0 in case of success or a error number otherwise
14511 */
14512int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014513 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014514 int ret = 0;
14515 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014516
14517 xmlInitParser();
14518
Owen Taylor3473f882001-02-23 17:55:21 +000014519 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14520 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014521 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14522 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014523 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014524 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014525
Daniel Veillard30211a02001-04-26 09:33:18 +000014526 if (user_data != NULL)
14527 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014528
Owen Taylor3473f882001-02-23 17:55:21 +000014529 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014530
Owen Taylor3473f882001-02-23 17:55:21 +000014531 if (ctxt->wellFormed)
14532 ret = 0;
14533 else {
14534 if (ctxt->errNo != 0)
14535 ret = ctxt->errNo;
14536 else
14537 ret = -1;
14538 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014539 if (sax != NULL)
14540 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014541 if (ctxt->myDoc != NULL) {
14542 xmlFreeDoc(ctxt->myDoc);
14543 ctxt->myDoc = NULL;
14544 }
Owen Taylor3473f882001-02-23 17:55:21 +000014545 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014546
Owen Taylor3473f882001-02-23 17:55:21 +000014547 return ret;
14548}
Daniel Veillard81273902003-09-30 00:43:48 +000014549#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014550
14551/**
14552 * xmlCreateDocParserCtxt:
14553 * @cur: a pointer to an array of xmlChar
14554 *
14555 * Creates a parser context for an XML in-memory document.
14556 *
14557 * Returns the new parser context or NULL
14558 */
14559xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014560xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014561 int len;
14562
14563 if (cur == NULL)
14564 return(NULL);
14565 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014566 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014567}
14568
Daniel Veillard81273902003-09-30 00:43:48 +000014569#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014570/**
14571 * xmlSAXParseDoc:
14572 * @sax: the SAX handler block
14573 * @cur: a pointer to an array of xmlChar
14574 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14575 * documents
14576 *
14577 * parse an XML in-memory document and build a tree.
14578 * It use the given SAX function block to handle the parsing callback.
14579 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014580 *
Owen Taylor3473f882001-02-23 17:55:21 +000014581 * Returns the resulting document tree
14582 */
14583
14584xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014585xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014586 xmlDocPtr ret;
14587 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014588 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014589
Daniel Veillard38936062004-11-04 17:45:11 +000014590 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014591
14592
14593 ctxt = xmlCreateDocParserCtxt(cur);
14594 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014595 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014596 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014597 ctxt->sax = sax;
14598 ctxt->userData = NULL;
14599 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014600 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014601
14602 xmlParseDocument(ctxt);
14603 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14604 else {
14605 ret = NULL;
14606 xmlFreeDoc(ctxt->myDoc);
14607 ctxt->myDoc = NULL;
14608 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014609 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014610 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014611 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014612
Owen Taylor3473f882001-02-23 17:55:21 +000014613 return(ret);
14614}
14615
14616/**
14617 * xmlParseDoc:
14618 * @cur: a pointer to an array of xmlChar
14619 *
14620 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014621 *
Owen Taylor3473f882001-02-23 17:55:21 +000014622 * Returns the resulting document tree
14623 */
14624
14625xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014626xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014627 return(xmlSAXParseDoc(NULL, cur, 0));
14628}
Daniel Veillard81273902003-09-30 00:43:48 +000014629#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014630
Daniel Veillard81273902003-09-30 00:43:48 +000014631#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014632/************************************************************************
14633 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014634 * Specific function to keep track of entities references *
14635 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014636 * *
14637 ************************************************************************/
14638
14639static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14640
14641/**
14642 * xmlAddEntityReference:
14643 * @ent : A valid entity
14644 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014645 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014646 *
14647 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14648 */
14649static void
14650xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14651 xmlNodePtr lastNode)
14652{
14653 if (xmlEntityRefFunc != NULL) {
14654 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14655 }
14656}
14657
14658
14659/**
14660 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014661 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014662 *
14663 * Set the function to call call back when a xml reference has been made
14664 */
14665void
14666xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14667{
14668 xmlEntityRefFunc = func;
14669}
Daniel Veillard81273902003-09-30 00:43:48 +000014670#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014671
14672/************************************************************************
14673 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014674 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014675 * *
14676 ************************************************************************/
14677
14678#ifdef LIBXML_XPATH_ENABLED
14679#include <libxml/xpath.h>
14680#endif
14681
Daniel Veillardffa3c742005-07-21 13:24:09 +000014682extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014683static int xmlParserInitialized = 0;
14684
14685/**
14686 * xmlInitParser:
14687 *
14688 * Initialization function for the XML parser.
14689 * This is not reentrant. Call once before processing in case of
14690 * use in multithreaded programs.
14691 */
14692
14693void
14694xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014695 if (xmlParserInitialized != 0)
14696 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014697
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014698#ifdef LIBXML_THREAD_ENABLED
14699 __xmlGlobalInitMutexLock();
14700 if (xmlParserInitialized == 0) {
14701#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014702 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014703 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014704 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14705 (xmlGenericError == NULL))
14706 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014707 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014708 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014709 xmlInitCharEncodingHandlers();
14710 xmlDefaultSAXHandlerInit();
14711 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014712#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014713 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014714#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014715#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014716 htmlInitAutoClose();
14717 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014718#endif
14719#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014720 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014721#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014722 xmlParserInitialized = 1;
14723#ifdef LIBXML_THREAD_ENABLED
14724 }
14725 __xmlGlobalInitMutexUnlock();
14726#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014727}
14728
14729/**
14730 * xmlCleanupParser:
14731 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014732 * This function name is somewhat misleading. It does not clean up
14733 * parser state, it cleans up memory allocated by the library itself.
14734 * It is a cleanup function for the XML library. It tries to reclaim all
14735 * related global memory allocated for the library processing.
14736 * It doesn't deallocate any document related memory. One should
14737 * call xmlCleanupParser() only when the process has finished using
14738 * the library and all XML/HTML documents built with it.
14739 * See also xmlInitParser() which has the opposite function of preparing
14740 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014741 *
14742 * WARNING: if your application is multithreaded or has plugin support
14743 * calling this may crash the application if another thread or
14744 * a plugin is still using libxml2. It's sometimes very hard to
14745 * guess if libxml2 is in use in the application, some libraries
14746 * or plugins may use it without notice. In case of doubt abstain
14747 * from calling this function or do it just before calling exit()
14748 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014749 */
14750
14751void
14752xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014753 if (!xmlParserInitialized)
14754 return;
14755
Owen Taylor3473f882001-02-23 17:55:21 +000014756 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014757#ifdef LIBXML_CATALOG_ENABLED
14758 xmlCatalogCleanup();
14759#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014760 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014761 xmlCleanupInputCallbacks();
14762#ifdef LIBXML_OUTPUT_ENABLED
14763 xmlCleanupOutputCallbacks();
14764#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014765#ifdef LIBXML_SCHEMAS_ENABLED
14766 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014767 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014768#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014769 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014770 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014771 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014772 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014773 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014774}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014775
14776/************************************************************************
14777 * *
14778 * New set (2.6.0) of simpler and more flexible APIs *
14779 * *
14780 ************************************************************************/
14781
14782/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014783 * DICT_FREE:
14784 * @str: a string
14785 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020014786 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014787 * current scope
14788 */
14789#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014790 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014791 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14792 xmlFree((char *)(str));
14793
14794/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014795 * xmlCtxtReset:
14796 * @ctxt: an XML parser context
14797 *
14798 * Reset a parser context
14799 */
14800void
14801xmlCtxtReset(xmlParserCtxtPtr ctxt)
14802{
14803 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014804 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014805
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014806 if (ctxt == NULL)
14807 return;
14808
14809 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014810
14811 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14812 xmlFreeInputStream(input);
14813 }
14814 ctxt->inputNr = 0;
14815 ctxt->input = NULL;
14816
14817 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014818 if (ctxt->spaceTab != NULL) {
14819 ctxt->spaceTab[0] = -1;
14820 ctxt->space = &ctxt->spaceTab[0];
14821 } else {
14822 ctxt->space = NULL;
14823 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014824
14825
14826 ctxt->nodeNr = 0;
14827 ctxt->node = NULL;
14828
14829 ctxt->nameNr = 0;
14830 ctxt->name = NULL;
14831
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014832 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014833 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014834 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014835 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014836 DICT_FREE(ctxt->directory);
14837 ctxt->directory = NULL;
14838 DICT_FREE(ctxt->extSubURI);
14839 ctxt->extSubURI = NULL;
14840 DICT_FREE(ctxt->extSubSystem);
14841 ctxt->extSubSystem = NULL;
14842 if (ctxt->myDoc != NULL)
14843 xmlFreeDoc(ctxt->myDoc);
14844 ctxt->myDoc = NULL;
14845
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014846 ctxt->standalone = -1;
14847 ctxt->hasExternalSubset = 0;
14848 ctxt->hasPErefs = 0;
14849 ctxt->html = 0;
14850 ctxt->external = 0;
14851 ctxt->instate = XML_PARSER_START;
14852 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014853
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014854 ctxt->wellFormed = 1;
14855 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014856 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014857 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014858#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014859 ctxt->vctxt.userData = ctxt;
14860 ctxt->vctxt.error = xmlParserValidityError;
14861 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014862#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014863 ctxt->record_info = 0;
14864 ctxt->nbChars = 0;
14865 ctxt->checkIndex = 0;
14866 ctxt->inSubset = 0;
14867 ctxt->errNo = XML_ERR_OK;
14868 ctxt->depth = 0;
14869 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14870 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014871 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014872 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014873 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014874 xmlInitNodeInfoSeq(&ctxt->node_seq);
14875
14876 if (ctxt->attsDefault != NULL) {
14877 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14878 ctxt->attsDefault = NULL;
14879 }
14880 if (ctxt->attsSpecial != NULL) {
14881 xmlHashFree(ctxt->attsSpecial, NULL);
14882 ctxt->attsSpecial = NULL;
14883 }
14884
Daniel Veillard4432df22003-09-28 18:58:27 +000014885#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014886 if (ctxt->catalogs != NULL)
14887 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014888#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014889 if (ctxt->lastError.code != XML_ERR_OK)
14890 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014891}
14892
14893/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014894 * xmlCtxtResetPush:
14895 * @ctxt: an XML parser context
14896 * @chunk: a pointer to an array of chars
14897 * @size: number of chars in the array
14898 * @filename: an optional file name or URI
14899 * @encoding: the document encoding, or NULL
14900 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014901 * Reset a push parser context
14902 *
14903 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014904 */
14905int
14906xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14907 int size, const char *filename, const char *encoding)
14908{
14909 xmlParserInputPtr inputStream;
14910 xmlParserInputBufferPtr buf;
14911 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14912
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014913 if (ctxt == NULL)
14914 return(1);
14915
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014916 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14917 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14918
14919 buf = xmlAllocParserInputBuffer(enc);
14920 if (buf == NULL)
14921 return(1);
14922
14923 if (ctxt == NULL) {
14924 xmlFreeParserInputBuffer(buf);
14925 return(1);
14926 }
14927
14928 xmlCtxtReset(ctxt);
14929
14930 if (ctxt->pushTab == NULL) {
14931 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14932 sizeof(xmlChar *));
14933 if (ctxt->pushTab == NULL) {
14934 xmlErrMemory(ctxt, NULL);
14935 xmlFreeParserInputBuffer(buf);
14936 return(1);
14937 }
14938 }
14939
14940 if (filename == NULL) {
14941 ctxt->directory = NULL;
14942 } else {
14943 ctxt->directory = xmlParserGetDirectory(filename);
14944 }
14945
14946 inputStream = xmlNewInputStream(ctxt);
14947 if (inputStream == NULL) {
14948 xmlFreeParserInputBuffer(buf);
14949 return(1);
14950 }
14951
14952 if (filename == NULL)
14953 inputStream->filename = NULL;
14954 else
14955 inputStream->filename = (char *)
14956 xmlCanonicPath((const xmlChar *) filename);
14957 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014958 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014959
14960 inputPush(ctxt, inputStream);
14961
14962 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14963 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014964 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14965 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014966
14967 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14968
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014969 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014970#ifdef DEBUG_PUSH
14971 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14972#endif
14973 }
14974
14975 if (encoding != NULL) {
14976 xmlCharEncodingHandlerPtr hdlr;
14977
Daniel Veillard37334572008-07-31 08:20:02 +000014978 if (ctxt->encoding != NULL)
14979 xmlFree((xmlChar *) ctxt->encoding);
14980 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14981
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014982 hdlr = xmlFindCharEncodingHandler(encoding);
14983 if (hdlr != NULL) {
14984 xmlSwitchToEncoding(ctxt, hdlr);
14985 } else {
14986 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14987 "Unsupported encoding %s\n", BAD_CAST encoding);
14988 }
14989 } else if (enc != XML_CHAR_ENCODING_NONE) {
14990 xmlSwitchEncoding(ctxt, enc);
14991 }
14992
14993 return(0);
14994}
14995
Daniel Veillard37334572008-07-31 08:20:02 +000014996
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014997/**
Daniel Veillard37334572008-07-31 08:20:02 +000014998 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014999 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015000 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015001 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015002 *
15003 * Applies the options to the parser context
15004 *
15005 * Returns 0 in case of success, the set of unknown or unimplemented options
15006 * in case of error.
15007 */
Daniel Veillard37334572008-07-31 08:20:02 +000015008static int
15009xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015010{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015011 if (ctxt == NULL)
15012 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015013 if (encoding != NULL) {
15014 if (ctxt->encoding != NULL)
15015 xmlFree((xmlChar *) ctxt->encoding);
15016 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15017 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015018 if (options & XML_PARSE_RECOVER) {
15019 ctxt->recovery = 1;
15020 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015021 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015022 } else
15023 ctxt->recovery = 0;
15024 if (options & XML_PARSE_DTDLOAD) {
15025 ctxt->loadsubset = XML_DETECT_IDS;
15026 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015027 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015028 } else
15029 ctxt->loadsubset = 0;
15030 if (options & XML_PARSE_DTDATTR) {
15031 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15032 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015033 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015034 }
15035 if (options & XML_PARSE_NOENT) {
15036 ctxt->replaceEntities = 1;
15037 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15038 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015039 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015040 } else
15041 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015042 if (options & XML_PARSE_PEDANTIC) {
15043 ctxt->pedantic = 1;
15044 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015045 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015046 } else
15047 ctxt->pedantic = 0;
15048 if (options & XML_PARSE_NOBLANKS) {
15049 ctxt->keepBlanks = 0;
15050 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15051 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015052 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015053 } else
15054 ctxt->keepBlanks = 1;
15055 if (options & XML_PARSE_DTDVALID) {
15056 ctxt->validate = 1;
15057 if (options & XML_PARSE_NOWARNING)
15058 ctxt->vctxt.warning = NULL;
15059 if (options & XML_PARSE_NOERROR)
15060 ctxt->vctxt.error = NULL;
15061 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015062 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015063 } else
15064 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015065 if (options & XML_PARSE_NOWARNING) {
15066 ctxt->sax->warning = NULL;
15067 options -= XML_PARSE_NOWARNING;
15068 }
15069 if (options & XML_PARSE_NOERROR) {
15070 ctxt->sax->error = NULL;
15071 ctxt->sax->fatalError = NULL;
15072 options -= XML_PARSE_NOERROR;
15073 }
Daniel Veillard81273902003-09-30 00:43:48 +000015074#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015075 if (options & XML_PARSE_SAX1) {
15076 ctxt->sax->startElement = xmlSAX2StartElement;
15077 ctxt->sax->endElement = xmlSAX2EndElement;
15078 ctxt->sax->startElementNs = NULL;
15079 ctxt->sax->endElementNs = NULL;
15080 ctxt->sax->initialized = 1;
15081 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015082 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015083 }
Daniel Veillard81273902003-09-30 00:43:48 +000015084#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015085 if (options & XML_PARSE_NODICT) {
15086 ctxt->dictNames = 0;
15087 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015088 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015089 } else {
15090 ctxt->dictNames = 1;
15091 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015092 if (options & XML_PARSE_NOCDATA) {
15093 ctxt->sax->cdataBlock = NULL;
15094 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015095 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015096 }
15097 if (options & XML_PARSE_NSCLEAN) {
15098 ctxt->options |= XML_PARSE_NSCLEAN;
15099 options -= XML_PARSE_NSCLEAN;
15100 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015101 if (options & XML_PARSE_NONET) {
15102 ctxt->options |= XML_PARSE_NONET;
15103 options -= XML_PARSE_NONET;
15104 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015105 if (options & XML_PARSE_COMPACT) {
15106 ctxt->options |= XML_PARSE_COMPACT;
15107 options -= XML_PARSE_COMPACT;
15108 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015109 if (options & XML_PARSE_OLD10) {
15110 ctxt->options |= XML_PARSE_OLD10;
15111 options -= XML_PARSE_OLD10;
15112 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015113 if (options & XML_PARSE_NOBASEFIX) {
15114 ctxt->options |= XML_PARSE_NOBASEFIX;
15115 options -= XML_PARSE_NOBASEFIX;
15116 }
15117 if (options & XML_PARSE_HUGE) {
15118 ctxt->options |= XML_PARSE_HUGE;
15119 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015120 if (ctxt->dict != NULL)
15121 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015122 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015123 if (options & XML_PARSE_OLDSAX) {
15124 ctxt->options |= XML_PARSE_OLDSAX;
15125 options -= XML_PARSE_OLDSAX;
15126 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015127 if (options & XML_PARSE_IGNORE_ENC) {
15128 ctxt->options |= XML_PARSE_IGNORE_ENC;
15129 options -= XML_PARSE_IGNORE_ENC;
15130 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015131 if (options & XML_PARSE_BIG_LINES) {
15132 ctxt->options |= XML_PARSE_BIG_LINES;
15133 options -= XML_PARSE_BIG_LINES;
15134 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015135 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015136 return (options);
15137}
15138
15139/**
Daniel Veillard37334572008-07-31 08:20:02 +000015140 * xmlCtxtUseOptions:
15141 * @ctxt: an XML parser context
15142 * @options: a combination of xmlParserOption
15143 *
15144 * Applies the options to the parser context
15145 *
15146 * Returns 0 in case of success, the set of unknown or unimplemented options
15147 * in case of error.
15148 */
15149int
15150xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15151{
15152 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15153}
15154
15155/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015156 * xmlDoRead:
15157 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015158 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015159 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015160 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015161 * @reuse: keep the context for reuse
15162 *
15163 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015164 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015165 * Returns the resulting document tree or NULL
15166 */
15167static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015168xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15169 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015170{
15171 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015172
15173 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015174 if (encoding != NULL) {
15175 xmlCharEncodingHandlerPtr hdlr;
15176
15177 hdlr = xmlFindCharEncodingHandler(encoding);
15178 if (hdlr != NULL)
15179 xmlSwitchToEncoding(ctxt, hdlr);
15180 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015181 if ((URL != NULL) && (ctxt->input != NULL) &&
15182 (ctxt->input->filename == NULL))
15183 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015184 xmlParseDocument(ctxt);
15185 if ((ctxt->wellFormed) || ctxt->recovery)
15186 ret = ctxt->myDoc;
15187 else {
15188 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015189 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015190 xmlFreeDoc(ctxt->myDoc);
15191 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015192 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015193 ctxt->myDoc = NULL;
15194 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015195 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015196 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015197
15198 return (ret);
15199}
15200
15201/**
15202 * xmlReadDoc:
15203 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015204 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015205 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015206 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015207 *
15208 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015209 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015210 * Returns the resulting document tree
15211 */
15212xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015213xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015214{
15215 xmlParserCtxtPtr ctxt;
15216
15217 if (cur == NULL)
15218 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015219 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015220
15221 ctxt = xmlCreateDocParserCtxt(cur);
15222 if (ctxt == NULL)
15223 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015224 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015225}
15226
15227/**
15228 * xmlReadFile:
15229 * @filename: a file or URL
15230 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015231 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015232 *
15233 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015234 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015235 * Returns the resulting document tree
15236 */
15237xmlDocPtr
15238xmlReadFile(const char *filename, const char *encoding, int options)
15239{
15240 xmlParserCtxtPtr ctxt;
15241
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015242 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015243 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015244 if (ctxt == NULL)
15245 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015246 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015247}
15248
15249/**
15250 * xmlReadMemory:
15251 * @buffer: a pointer to a char array
15252 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015253 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015254 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015255 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015256 *
15257 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015258 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015259 * Returns the resulting document tree
15260 */
15261xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015262xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015263{
15264 xmlParserCtxtPtr ctxt;
15265
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015266 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015267 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15268 if (ctxt == NULL)
15269 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015270 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015271}
15272
15273/**
15274 * xmlReadFd:
15275 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015276 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015277 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015278 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015279 *
15280 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015281 * NOTE that the file descriptor will not be closed when the
15282 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015283 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015284 * Returns the resulting document tree
15285 */
15286xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015287xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015288{
15289 xmlParserCtxtPtr ctxt;
15290 xmlParserInputBufferPtr input;
15291 xmlParserInputPtr stream;
15292
15293 if (fd < 0)
15294 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015295 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015296
15297 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15298 if (input == NULL)
15299 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015300 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015301 ctxt = xmlNewParserCtxt();
15302 if (ctxt == NULL) {
15303 xmlFreeParserInputBuffer(input);
15304 return (NULL);
15305 }
15306 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15307 if (stream == NULL) {
15308 xmlFreeParserInputBuffer(input);
15309 xmlFreeParserCtxt(ctxt);
15310 return (NULL);
15311 }
15312 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015313 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015314}
15315
15316/**
15317 * xmlReadIO:
15318 * @ioread: an I/O read function
15319 * @ioclose: an I/O close function
15320 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015321 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015322 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015323 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015324 *
15325 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015326 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015327 * Returns the resulting document tree
15328 */
15329xmlDocPtr
15330xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015331 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015332{
15333 xmlParserCtxtPtr ctxt;
15334 xmlParserInputBufferPtr input;
15335 xmlParserInputPtr stream;
15336
15337 if (ioread == NULL)
15338 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015339 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015340
15341 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15342 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015343 if (input == NULL) {
15344 if (ioclose != NULL)
15345 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015346 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015347 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015348 ctxt = xmlNewParserCtxt();
15349 if (ctxt == NULL) {
15350 xmlFreeParserInputBuffer(input);
15351 return (NULL);
15352 }
15353 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15354 if (stream == NULL) {
15355 xmlFreeParserInputBuffer(input);
15356 xmlFreeParserCtxt(ctxt);
15357 return (NULL);
15358 }
15359 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015360 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015361}
15362
15363/**
15364 * xmlCtxtReadDoc:
15365 * @ctxt: an XML parser context
15366 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015367 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015368 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015369 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015370 *
15371 * parse an XML in-memory document and build a tree.
15372 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015373 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015374 * Returns the resulting document tree
15375 */
15376xmlDocPtr
15377xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015378 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015379{
15380 xmlParserInputPtr stream;
15381
15382 if (cur == NULL)
15383 return (NULL);
15384 if (ctxt == NULL)
15385 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015386 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015387
15388 xmlCtxtReset(ctxt);
15389
15390 stream = xmlNewStringInputStream(ctxt, cur);
15391 if (stream == NULL) {
15392 return (NULL);
15393 }
15394 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015395 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015396}
15397
15398/**
15399 * xmlCtxtReadFile:
15400 * @ctxt: an XML parser context
15401 * @filename: a file or URL
15402 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015403 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015404 *
15405 * parse an XML file from the filesystem or the network.
15406 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015407 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015408 * Returns the resulting document tree
15409 */
15410xmlDocPtr
15411xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15412 const char *encoding, int options)
15413{
15414 xmlParserInputPtr stream;
15415
15416 if (filename == NULL)
15417 return (NULL);
15418 if (ctxt == NULL)
15419 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015420 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015421
15422 xmlCtxtReset(ctxt);
15423
Daniel Veillard29614c72004-11-26 10:47:26 +000015424 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015425 if (stream == NULL) {
15426 return (NULL);
15427 }
15428 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015429 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015430}
15431
15432/**
15433 * xmlCtxtReadMemory:
15434 * @ctxt: an XML parser context
15435 * @buffer: a pointer to a char array
15436 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015437 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015438 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015439 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015440 *
15441 * parse an XML in-memory document and build a tree.
15442 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015443 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015444 * Returns the resulting document tree
15445 */
15446xmlDocPtr
15447xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015448 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015449{
15450 xmlParserInputBufferPtr input;
15451 xmlParserInputPtr stream;
15452
15453 if (ctxt == NULL)
15454 return (NULL);
15455 if (buffer == NULL)
15456 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015457 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015458
15459 xmlCtxtReset(ctxt);
15460
15461 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15462 if (input == NULL) {
15463 return(NULL);
15464 }
15465
15466 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15467 if (stream == NULL) {
15468 xmlFreeParserInputBuffer(input);
15469 return(NULL);
15470 }
15471
15472 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015473 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015474}
15475
15476/**
15477 * xmlCtxtReadFd:
15478 * @ctxt: an XML parser context
15479 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015480 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015481 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015482 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015483 *
15484 * parse an XML from a file descriptor and build a tree.
15485 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015486 * NOTE that the file descriptor will not be closed when the
15487 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015488 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015489 * Returns the resulting document tree
15490 */
15491xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015492xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15493 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015494{
15495 xmlParserInputBufferPtr input;
15496 xmlParserInputPtr stream;
15497
15498 if (fd < 0)
15499 return (NULL);
15500 if (ctxt == NULL)
15501 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015502 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015503
15504 xmlCtxtReset(ctxt);
15505
15506
15507 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15508 if (input == NULL)
15509 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015510 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015511 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15512 if (stream == NULL) {
15513 xmlFreeParserInputBuffer(input);
15514 return (NULL);
15515 }
15516 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015517 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015518}
15519
15520/**
15521 * xmlCtxtReadIO:
15522 * @ctxt: an XML parser context
15523 * @ioread: an I/O read function
15524 * @ioclose: an I/O close function
15525 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015526 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015527 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015528 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015529 *
15530 * parse an XML document from I/O functions and source and build a tree.
15531 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015532 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015533 * Returns the resulting document tree
15534 */
15535xmlDocPtr
15536xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15537 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015538 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015539 const char *encoding, int options)
15540{
15541 xmlParserInputBufferPtr input;
15542 xmlParserInputPtr stream;
15543
15544 if (ioread == NULL)
15545 return (NULL);
15546 if (ctxt == NULL)
15547 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015548 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015549
15550 xmlCtxtReset(ctxt);
15551
15552 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15553 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015554 if (input == NULL) {
15555 if (ioclose != NULL)
15556 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015557 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015558 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015559 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15560 if (stream == NULL) {
15561 xmlFreeParserInputBuffer(input);
15562 return (NULL);
15563 }
15564 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015565 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015566}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015567
15568#define bottom_parser
15569#include "elfgcchack.h"