blob: 63ae15f0f6b08489614ba6bb35c33f554e5d7ec5 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080097static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
Daniel Veillard0161e632008-08-28 15:36:32 +000099/************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105#define XML_PARSER_BIG_ENTITY 1000
106#define XML_PARSER_LOT_ENTITY 5000
107
108/*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114#define XML_PARSER_NON_LINEAR 10
115
116/*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800126xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800127 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000128{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800129 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800143 unsigned long oldnbent = ctxt->nbentities;
144 xmlChar *rep;
145
146 ent->checked = 1;
147
Peter Simons8f30bdf2016-04-15 11:56:55 +0200148 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200151 --ctxt->depth;
Daniel Veillardbdd66182016-05-23 12:27:58 +0800152 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
153 ent->content[0] = 0;
154 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800155
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 if (rep != NULL) {
158 if (xmlStrchr(rep, '<'))
159 ent->checked |= 1;
160 xmlFree(rep);
161 rep = NULL;
162 }
163 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
166 return(0);
167
168 /*
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
172 */
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
176 }
177 consumed += ctxt->sizeentities;
178
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 return(0);
181 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000182 /*
183 * Do the check based on the replacement size of the entity
184 */
185 if (size < XML_PARSER_BIG_ENTITY)
186 return(0);
187
188 /*
189 * A limit on the amount of text data reasonably used
190 */
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
194 }
195 consumed += ctxt->sizeentities;
196
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199 return (0);
200 } else if (ent != NULL) {
201 /*
202 * use the number of parsed entities in the replacement
203 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800204 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000205
206 /*
207 * The amount of data parsed counting entities size only once
208 */
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
212 }
213 consumed += ctxt->sizeentities;
214
215 /*
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
218 */
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220 return (0);
221 } else {
222 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800223 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000224 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
228 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000229 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 return (1);
232}
233
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000234/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000235 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000236 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
240 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000241 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000242unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000243
Daniel Veillard0fb18932003-09-07 09:14:37 +0000244
Daniel Veillard0161e632008-08-28 15:36:32 +0000245
246#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000247#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000248#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000249#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250
Daniel Veillard1f972e92012-08-15 10:16:37 +0800251/**
252 * XML_PARSER_CHUNK_SIZE
253 *
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
259 */
260#define XML_PARSER_CHUNK_SIZE 100
261
Owen Taylor3473f882001-02-23 17:55:21 +0000262/*
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * List of XML prefixed PI allowed by W3C specs
264 */
265
Daniel Veillardb44025c2001-10-11 22:55:55 +0000266static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000267 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800268 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000269 NULL
270};
271
Daniel Veillarda07050d2003-10-19 14:46:32 +0000272
Owen Taylor3473f882001-02-23 17:55:21 +0000273/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200274static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000276
Daniel Veillard7d515752003-09-26 19:12:37 +0000277static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000278xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000280 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000281 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard37334572008-07-31 08:20:02 +0000283static int
284xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000286#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000287static void
288xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000290#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000291
Daniel Veillard7d515752003-09-26 19:12:37 +0000292static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000293xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000295
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000296static int
297xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298
Daniel Veillarde57ec792003-09-10 10:50:59 +0000299/************************************************************************
300 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800301 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 * *
303 ************************************************************************/
304
305/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
310 *
311 * Handle a redefinition of attribute error
312 */
313static void
314xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
316{
Daniel Veillard157fee02003-10-31 10:36:03 +0000317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
319 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000320 if (ctxt != NULL)
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200322
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000323 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000328 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000334 if (ctxt != NULL) {
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
338 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339}
340
341/**
342 * xmlFatalErr:
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
346 *
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348 */
349static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351{
352 const char *errmsg;
353
Daniel Veillard157fee02003-10-31 10:36:03 +0000354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
356 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 switch (error) {
358 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 errmsg = "internal error";
369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "Fragment not allowed";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800443 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800446 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800462 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000464 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800465 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800468 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
474 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000492 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000495 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800499 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000501 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800502 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800505 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000506 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000507 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800508 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000509 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000510 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800511 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000512 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000513 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000525 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800528 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800530 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000531#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000532 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800533 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000534 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000535#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000536 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800537 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000538 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000539 if (ctxt != NULL)
540 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800541 if (info == NULL) {
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544 errmsg);
545 } else {
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548 errmsg, info);
549 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000555}
556
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000557/**
558 * xmlFatalErrMsg:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800565static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000566xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000568{
Daniel Veillard157fee02003-10-31 10:36:03 +0000569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
571 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000572 if (ctxt != NULL)
573 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
580 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581}
582
583/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000584 * xmlWarningMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
588 * @str1: extra data
589 * @str2: extra data
590 *
591 * Handle a warning.
592 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800593static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000594xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
596{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000597 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000598
Daniel Veillard157fee02003-10-31 10:36:03 +0000599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
601 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000604 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200605 if (ctxt != NULL) {
606 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000607 (ctxt->sax) ? ctxt->sax->warning : NULL,
608 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200613 } else {
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000620}
621
622/**
623 * xmlValidityError:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000629 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000630 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800631static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000632xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000633 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000634{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000635 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000636
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
639 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000640 if (ctxt != NULL) {
641 ctxt->errNo = error;
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200645 if (ctxt != NULL) {
646 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000652 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200653 } else {
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000659 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000660}
661
662/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000663 * xmlFatalErrMsgInt:
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
668 *
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800671static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000672xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000673 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000674{
Daniel Veillard157fee02003-10-31 10:36:03 +0000675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
677 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000678 if (ctxt != NULL)
679 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000680 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000683 if (ctxt != NULL) {
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
687 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000688}
689
690/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
698 *
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800701static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000702xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800703 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000704 const xmlChar *str2)
705{
Daniel Veillard157fee02003-10-31 10:36:03 +0000706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
708 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL)
710 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000711 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000715 if (ctxt != NULL) {
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
719 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000720}
721
722/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000723 * xmlFatalErrMsgStr:
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
728 *
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800731static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000732xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000733 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000734{
Daniel Veillard157fee02003-10-31 10:36:03 +0000735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000738 if (ctxt != NULL)
739 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000749}
750
751/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000752 * xmlErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a non fatal parser error
759 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800760static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000761xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763{
Daniel Veillard157fee02003-10-31 10:36:03 +0000764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000767 if (ctxt != NULL)
768 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773}
774
775/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000776 * xmlNsErr:
777 * @ctxt: an XML parser context
778 * @error: the error number
779 * @msg: the message
780 * @info1: extra information string
781 * @info2: extra information string
782 *
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800785static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000786xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000790{
Daniel Veillard157fee02003-10-31 10:36:03 +0000791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
793 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000794 if (ctxt != NULL)
795 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000800 if (ctxt != NULL)
801 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000802}
803
Daniel Veillard37334572008-07-31 08:20:02 +0000804/**
805 * xmlNsWarn
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800812 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000813 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800814static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000815xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819{
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
827}
828
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000829/************************************************************************
830 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800831 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832 * *
833 ************************************************************************/
834
835/**
836 * xmlHasFeature:
837 * @feature: the feature to be examined
838 *
839 * Examines if the library has been compiled with a given feature.
840 *
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
844 */
845int
846xmlHasFeature(xmlFeature feature)
847{
848 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000849 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850#ifdef LIBXML_THREAD_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_TREE_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_OUTPUT_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_PUSH_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_READER_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_PATTERN_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_WRITER_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_SAX1_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_FTP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_HTTP_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_VALID_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_HTML_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_LEGACY_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_C14N_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_CATALOG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef LIBXML_XPATH_ENABLED
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_XPTR_ENABLED
947 return(1);
948#else
949 return(0);
950#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000951 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000952#ifdef LIBXML_XINCLUDE_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000957 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000958#ifdef LIBXML_ICONV_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000963 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000964#ifdef LIBXML_ISO8859X_ENABLED
965 return(1);
966#else
967 return(0);
968#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000969 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000970#ifdef LIBXML_UNICODE_ENABLED
971 return(1);
972#else
973 return(0);
974#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000975 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000976#ifdef LIBXML_REGEXP_ENABLED
977 return(1);
978#else
979 return(0);
980#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000981 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000982#ifdef LIBXML_AUTOMATA_ENABLED
983 return(1);
984#else
985 return(0);
986#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000987 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000988#ifdef LIBXML_EXPR_ENABLED
989 return(1);
990#else
991 return(0);
992#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000993 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000994#ifdef LIBXML_SCHEMAS_ENABLED
995 return(1);
996#else
997 return(0);
998#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000999 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001000#ifdef LIBXML_SCHEMATRON_ENABLED
1001 return(1);
1002#else
1003 return(0);
1004#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001005 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001006#ifdef LIBXML_MODULES_ENABLED
1007 return(1);
1008#else
1009 return(0);
1010#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001011 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012#ifdef LIBXML_DEBUG_ENABLED
1013 return(1);
1014#else
1015 return(0);
1016#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001017 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001018#ifdef DEBUG_MEMORY_LOCATION
1019 return(1);
1020#else
1021 return(0);
1022#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001023 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001024#ifdef LIBXML_DEBUG_RUNTIME
1025 return(1);
1026#else
1027 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001028#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001029 case XML_WITH_ZLIB:
1030#ifdef LIBXML_ZLIB_ENABLED
1031 return(1);
1032#else
1033 return(0);
1034#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001035 case XML_WITH_LZMA:
1036#ifdef LIBXML_LZMA_ENABLED
1037 return(1);
1038#else
1039 return(0);
1040#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001041 case XML_WITH_ICU:
1042#ifdef LIBXML_ICU_ENABLED
1043 return(1);
1044#else
1045 return(0);
1046#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001047 default:
1048 break;
1049 }
1050 return(0);
1051}
1052
1053/************************************************************************
1054 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001055 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 * *
1057 ************************************************************************/
1058
1059/**
1060 * xmlDetectSAX2:
1061 * @ctxt: an XML parser context
1062 *
1063 * Do the SAX2 detection and specific intialization
1064 */
1065static void
1066xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001068#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001072#else
1073 ctxt->sax2 = 1;
1074#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001075
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001081 xmlErrMemory(ctxt, NULL);
1082 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083}
1084
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085typedef struct _xmlDefAttrs xmlDefAttrs;
1086typedef xmlDefAttrs *xmlDefAttrsPtr;
1087struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001090 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001091};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001092
1093/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001094 * xmlAttrNormalizeSpace:
1095 * @src: the source string
1096 * @dst: the target string
1097 *
1098 * Normalize the space in non CDATA attribute values:
1099 * If the attribute type is not CDATA, then the XML processor MUST further
1100 * process the normalized attribute value by discarding any leading and
1101 * trailing space (#x20) characters, and by replacing sequences of space
1102 * (#x20) characters by a single space (#x20) character.
1103 * Note that the size of dst need to be at least src, and if one doesn't need
1104 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1105 * passing src as dst is just fine.
1106 *
1107 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1108 * is needed.
1109 */
1110static xmlChar *
1111xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1112{
1113 if ((src == NULL) || (dst == NULL))
1114 return(NULL);
1115
1116 while (*src == 0x20) src++;
1117 while (*src != 0) {
1118 if (*src == 0x20) {
1119 while (*src == 0x20) src++;
1120 if (*src != 0)
1121 *dst++ = 0x20;
1122 } else {
1123 *dst++ = *src++;
1124 }
1125 }
1126 *dst = 0;
1127 if (dst == src)
1128 return(NULL);
1129 return(dst);
1130}
1131
1132/**
1133 * xmlAttrNormalizeSpace2:
1134 * @src: the source string
1135 *
1136 * Normalize the space in non CDATA attribute values, a slightly more complex
1137 * front end to avoid allocation problems when running on attribute values
1138 * coming from the input.
1139 *
1140 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1141 * is needed.
1142 */
1143static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001144xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001145{
1146 int i;
1147 int remove_head = 0;
1148 int need_realloc = 0;
1149 const xmlChar *cur;
1150
1151 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1152 return(NULL);
1153 i = *len;
1154 if (i <= 0)
1155 return(NULL);
1156
1157 cur = src;
1158 while (*cur == 0x20) {
1159 cur++;
1160 remove_head++;
1161 }
1162 while (*cur != 0) {
1163 if (*cur == 0x20) {
1164 cur++;
1165 if ((*cur == 0x20) || (*cur == 0)) {
1166 need_realloc = 1;
1167 break;
1168 }
1169 } else
1170 cur++;
1171 }
1172 if (need_realloc) {
1173 xmlChar *ret;
1174
1175 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1176 if (ret == NULL) {
1177 xmlErrMemory(ctxt, NULL);
1178 return(NULL);
1179 }
1180 xmlAttrNormalizeSpace(ret, ret);
1181 *len = (int) strlen((const char *)ret);
1182 return(ret);
1183 } else if (remove_head) {
1184 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001185 memmove(src, src + remove_head, 1 + *len);
1186 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001187 }
1188 return(NULL);
1189}
1190
1191/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 * xmlAddDefAttrs:
1193 * @ctxt: an XML parser context
1194 * @fullname: the element fullname
1195 * @fullattr: the attribute fullname
1196 * @value: the attribute value
1197 *
1198 * Add a defaulted attribute for an element
1199 */
1200static void
1201xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1202 const xmlChar *fullname,
1203 const xmlChar *fullattr,
1204 const xmlChar *value) {
1205 xmlDefAttrsPtr defaults;
1206 int len;
1207 const xmlChar *name;
1208 const xmlChar *prefix;
1209
Daniel Veillard6a31b832008-03-26 14:06:44 +00001210 /*
1211 * Allows to detect attribute redefinitions
1212 */
1213 if (ctxt->attsSpecial != NULL) {
1214 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1215 return;
1216 }
1217
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001219 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001220 if (ctxt->attsDefault == NULL)
1221 goto mem_error;
1222 }
1223
1224 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 * split the element name into prefix:localname , the string found
1226 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001227 */
1228 name = xmlSplitQName3(fullname, &len);
1229 if (name == NULL) {
1230 name = xmlDictLookup(ctxt->dict, fullname, -1);
1231 prefix = NULL;
1232 } else {
1233 name = xmlDictLookup(ctxt->dict, name, -1);
1234 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1235 }
1236
1237 /*
1238 * make sure there is some storage
1239 */
1240 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1241 if (defaults == NULL) {
1242 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001243 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001244 if (defaults == NULL)
1245 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001246 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001247 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001248 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1249 defaults, NULL) < 0) {
1250 xmlFree(defaults);
1251 goto mem_error;
1252 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001253 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001254 xmlDefAttrsPtr temp;
1255
1256 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001257 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001258 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001259 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001260 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001261 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001262 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1263 defaults, NULL) < 0) {
1264 xmlFree(defaults);
1265 goto mem_error;
1266 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001267 }
1268
1269 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001270 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001271 * are within the DTD and hen not associated to namespace names.
1272 */
1273 name = xmlSplitQName3(fullattr, &len);
1274 if (name == NULL) {
1275 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1276 prefix = NULL;
1277 } else {
1278 name = xmlDictLookup(ctxt->dict, name, -1);
1279 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1280 }
1281
Daniel Veillardae0765b2008-07-31 19:54:59 +00001282 defaults->values[5 * defaults->nbAttrs] = name;
1283 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001284 /* intern the string and precompute the end */
1285 len = xmlStrlen(value);
1286 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001287 defaults->values[5 * defaults->nbAttrs + 2] = value;
1288 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1289 if (ctxt->external)
1290 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1291 else
1292 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001293 defaults->nbAttrs++;
1294
1295 return;
1296
1297mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001298 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001299 return;
1300}
1301
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001302/**
1303 * xmlAddSpecialAttr:
1304 * @ctxt: an XML parser context
1305 * @fullname: the element fullname
1306 * @fullattr: the attribute fullname
1307 * @type: the attribute type
1308 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001309 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001310 */
1311static void
1312xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1313 const xmlChar *fullname,
1314 const xmlChar *fullattr,
1315 int type)
1316{
1317 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001318 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001319 if (ctxt->attsSpecial == NULL)
1320 goto mem_error;
1321 }
1322
Daniel Veillardac4118d2008-01-11 05:27:32 +00001323 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1324 return;
1325
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001326 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1327 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001328 return;
1329
1330mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001331 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001332 return;
1333}
1334
Daniel Veillard4432df22003-09-28 18:58:27 +00001335/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001336 * xmlCleanSpecialAttrCallback:
1337 *
1338 * Removes CDATA attributes from the special attribute table
1339 */
1340static void
1341xmlCleanSpecialAttrCallback(void *payload, void *data,
1342 const xmlChar *fullname, const xmlChar *fullattr,
1343 const xmlChar *unused ATTRIBUTE_UNUSED) {
1344 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1345
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001346 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001347 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1348 }
1349}
1350
1351/**
1352 * xmlCleanSpecialAttr:
1353 * @ctxt: an XML parser context
1354 *
1355 * Trim the list of attributes defined to remove all those of type
1356 * CDATA as they are not special. This call should be done when finishing
1357 * to parse the DTD and before starting to parse the document root.
1358 */
1359static void
1360xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1361{
1362 if (ctxt->attsSpecial == NULL)
1363 return;
1364
1365 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1366
1367 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1368 xmlHashFree(ctxt->attsSpecial, NULL);
1369 ctxt->attsSpecial = NULL;
1370 }
1371 return;
1372}
1373
1374/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001375 * xmlCheckLanguageID:
1376 * @lang: pointer to the string value
1377 *
1378 * Checks that the value conforms to the LanguageID production:
1379 *
1380 * NOTE: this is somewhat deprecated, those productions were removed from
1381 * the XML Second edition.
1382 *
1383 * [33] LanguageID ::= Langcode ('-' Subcode)*
1384 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1385 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1386 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1387 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1388 * [38] Subcode ::= ([a-z] | [A-Z])+
1389 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001390 * The current REC reference the sucessors of RFC 1766, currently 5646
1391 *
1392 * http://www.rfc-editor.org/rfc/rfc5646.txt
1393 * langtag = language
1394 * ["-" script]
1395 * ["-" region]
1396 * *("-" variant)
1397 * *("-" extension)
1398 * ["-" privateuse]
1399 * language = 2*3ALPHA ; shortest ISO 639 code
1400 * ["-" extlang] ; sometimes followed by
1401 * ; extended language subtags
1402 * / 4ALPHA ; or reserved for future use
1403 * / 5*8ALPHA ; or registered language subtag
1404 *
1405 * extlang = 3ALPHA ; selected ISO 639 codes
1406 * *2("-" 3ALPHA) ; permanently reserved
1407 *
1408 * script = 4ALPHA ; ISO 15924 code
1409 *
1410 * region = 2ALPHA ; ISO 3166-1 code
1411 * / 3DIGIT ; UN M.49 code
1412 *
1413 * variant = 5*8alphanum ; registered variants
1414 * / (DIGIT 3alphanum)
1415 *
1416 * extension = singleton 1*("-" (2*8alphanum))
1417 *
1418 * ; Single alphanumerics
1419 * ; "x" reserved for private use
1420 * singleton = DIGIT ; 0 - 9
1421 * / %x41-57 ; A - W
1422 * / %x59-5A ; Y - Z
1423 * / %x61-77 ; a - w
1424 * / %x79-7A ; y - z
1425 *
1426 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1427 * The parser below doesn't try to cope with extension or privateuse
1428 * that could be added but that's not interoperable anyway
1429 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001430 * Returns 1 if correct 0 otherwise
1431 **/
1432int
1433xmlCheckLanguageID(const xmlChar * lang)
1434{
Daniel Veillard60587d62010-11-04 15:16:27 +01001435 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001436
1437 if (cur == NULL)
1438 return (0);
1439 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001440 ((cur[0] == 'I') && (cur[1] == '-')) ||
1441 ((cur[0] == 'x') && (cur[1] == '-')) ||
1442 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001443 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001444 * Still allow IANA code and user code which were coming
1445 * from the previous version of the XML-1.0 specification
1446 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001447 */
1448 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001449 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001450 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1451 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001452 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001453 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001454 nxt = cur;
1455 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1456 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1457 nxt++;
1458 if (nxt - cur >= 4) {
1459 /*
1460 * Reserved
1461 */
1462 if ((nxt - cur > 8) || (nxt[0] != 0))
1463 return(0);
1464 return(1);
1465 }
1466 if (nxt - cur < 2)
1467 return(0);
1468 /* we got an ISO 639 code */
1469 if (nxt[0] == 0)
1470 return(1);
1471 if (nxt[0] != '-')
1472 return(0);
1473
1474 nxt++;
1475 cur = nxt;
1476 /* now we can have extlang or script or region or variant */
1477 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1478 goto region_m49;
1479
1480 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1481 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1482 nxt++;
1483 if (nxt - cur == 4)
1484 goto script;
1485 if (nxt - cur == 2)
1486 goto region;
1487 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1488 goto variant;
1489 if (nxt - cur != 3)
1490 return(0);
1491 /* we parsed an extlang */
1492 if (nxt[0] == 0)
1493 return(1);
1494 if (nxt[0] != '-')
1495 return(0);
1496
1497 nxt++;
1498 cur = nxt;
1499 /* now we can have script or region or variant */
1500 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1501 goto region_m49;
1502
1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505 nxt++;
1506 if (nxt - cur == 2)
1507 goto region;
1508 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1509 goto variant;
1510 if (nxt - cur != 4)
1511 return(0);
1512 /* we parsed a script */
1513script:
1514 if (nxt[0] == 0)
1515 return(1);
1516 if (nxt[0] != '-')
1517 return(0);
1518
1519 nxt++;
1520 cur = nxt;
1521 /* now we can have region or variant */
1522 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1523 goto region_m49;
1524
1525 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1526 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1527 nxt++;
1528
1529 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1530 goto variant;
1531 if (nxt - cur != 2)
1532 return(0);
1533 /* we parsed a region */
1534region:
1535 if (nxt[0] == 0)
1536 return(1);
1537 if (nxt[0] != '-')
1538 return(0);
1539
1540 nxt++;
1541 cur = nxt;
1542 /* now we can just have a variant */
1543 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1544 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1545 nxt++;
1546
1547 if ((nxt - cur < 5) || (nxt - cur > 8))
1548 return(0);
1549
1550 /* we parsed a variant */
1551variant:
1552 if (nxt[0] == 0)
1553 return(1);
1554 if (nxt[0] != '-')
1555 return(0);
1556 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001557 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001558
1559region_m49:
1560 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1561 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1562 nxt += 3;
1563 goto region;
1564 }
1565 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001566}
1567
Owen Taylor3473f882001-02-23 17:55:21 +00001568/************************************************************************
1569 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001570 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001571 * *
1572 ************************************************************************/
1573
Daniel Veillard8ed10722009-08-20 19:17:36 +02001574static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1575 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001576
Daniel Veillard0fb18932003-09-07 09:14:37 +00001577#ifdef SAX2
1578/**
1579 * nsPush:
1580 * @ctxt: an XML parser context
1581 * @prefix: the namespace prefix or NULL
1582 * @URL: the namespace name
1583 *
1584 * Pushes a new parser namespace on top of the ns stack
1585 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001586 * Returns -1 in case of error, -2 if the namespace should be discarded
1587 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001588 */
1589static int
1590nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1591{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001592 if (ctxt->options & XML_PARSE_NSCLEAN) {
1593 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001594 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001595 if (ctxt->nsTab[i] == prefix) {
1596 /* in scope */
1597 if (ctxt->nsTab[i + 1] == URL)
1598 return(-2);
1599 /* out of scope keep it */
1600 break;
1601 }
1602 }
1603 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001604 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1605 ctxt->nsMax = 10;
1606 ctxt->nsNr = 0;
1607 ctxt->nsTab = (const xmlChar **)
1608 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1609 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001610 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001611 ctxt->nsMax = 0;
1612 return (-1);
1613 }
1614 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001615 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001616 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001617 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1618 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1619 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001620 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001621 ctxt->nsMax /= 2;
1622 return (-1);
1623 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001624 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001625 }
1626 ctxt->nsTab[ctxt->nsNr++] = prefix;
1627 ctxt->nsTab[ctxt->nsNr++] = URL;
1628 return (ctxt->nsNr);
1629}
1630/**
1631 * nsPop:
1632 * @ctxt: an XML parser context
1633 * @nr: the number to pop
1634 *
1635 * Pops the top @nr parser prefix/namespace from the ns stack
1636 *
1637 * Returns the number of namespaces removed
1638 */
1639static int
1640nsPop(xmlParserCtxtPtr ctxt, int nr)
1641{
1642 int i;
1643
1644 if (ctxt->nsTab == NULL) return(0);
1645 if (ctxt->nsNr < nr) {
1646 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1647 nr = ctxt->nsNr;
1648 }
1649 if (ctxt->nsNr <= 0)
1650 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001651
Daniel Veillard0fb18932003-09-07 09:14:37 +00001652 for (i = 0;i < nr;i++) {
1653 ctxt->nsNr--;
1654 ctxt->nsTab[ctxt->nsNr] = NULL;
1655 }
1656 return(nr);
1657}
1658#endif
1659
1660static int
1661xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1662 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001663 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001664 int maxatts;
1665
1666 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001667 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001668 atts = (const xmlChar **)
1669 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001670 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001671 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001672 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1673 if (attallocs == NULL) goto mem_error;
1674 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001675 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001676 } else if (nr + 5 > ctxt->maxatts) {
1677 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001678 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1679 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001680 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001681 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001682 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1683 (maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001686 ctxt->maxatts = maxatts;
1687 }
1688 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001689mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001690 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001691 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001692}
1693
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001694/**
1695 * inputPush:
1696 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001697 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 *
1699 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001700 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001701 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001702 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001703int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001704inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1705{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001706 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001707 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 if (ctxt->inputNr >= ctxt->inputMax) {
1709 ctxt->inputMax *= 2;
1710 ctxt->inputTab =
1711 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712 ctxt->inputMax *
1713 sizeof(ctxt->inputTab[0]));
1714 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001715 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001716 xmlFreeInputStream(value);
1717 ctxt->inputMax /= 2;
1718 value = NULL;
1719 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001720 }
1721 }
1722 ctxt->inputTab[ctxt->inputNr] = value;
1723 ctxt->input = value;
1724 return (ctxt->inputNr++);
1725}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001726/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001728 * @ctxt: an XML parser context
1729 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001731 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001732 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001733 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001734xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735inputPop(xmlParserCtxtPtr ctxt)
1736{
1737 xmlParserInputPtr ret;
1738
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001739 if (ctxt == NULL)
1740 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001742 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001743 ctxt->inputNr--;
1744 if (ctxt->inputNr > 0)
1745 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1746 else
1747 ctxt->input = NULL;
1748 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001749 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001750 return (ret);
1751}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001752/**
1753 * nodePush:
1754 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001755 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001756 *
1757 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001758 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001759 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001760 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001761int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001762nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1763{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001764 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001766 xmlNodePtr *tmp;
1767
1768 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001771 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001772 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001773 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001774 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001775 ctxt->nodeTab = tmp;
1776 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001777 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001778 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001780 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001782 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001783 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001784 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001785 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001786 ctxt->nodeTab[ctxt->nodeNr] = value;
1787 ctxt->node = value;
1788 return (ctxt->nodeNr++);
1789}
Daniel Veillard8915c152008-08-26 13:05:34 +00001790
Daniel Veillard1c732d22002-11-30 11:22:59 +00001791/**
1792 * nodePop:
1793 * @ctxt: an XML parser context
1794 *
1795 * Pops the top element node from the node stack
1796 *
1797 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001798 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001799xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001800nodePop(xmlParserCtxtPtr ctxt)
1801{
1802 xmlNodePtr ret;
1803
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001804 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001805 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001806 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001807 ctxt->nodeNr--;
1808 if (ctxt->nodeNr > 0)
1809 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1810 else
1811 ctxt->node = NULL;
1812 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001813 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001814 return (ret);
1815}
Daniel Veillarda2351322004-06-27 12:08:10 +00001816
1817#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001818/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001819 * nameNsPush:
1820 * @ctxt: an XML parser context
1821 * @value: the element name
1822 * @prefix: the element prefix
1823 * @URI: the element namespace name
1824 *
1825 * Pushes a new element name/prefix/URL on top of the name stack
1826 *
1827 * Returns -1 in case of error, the index in the stack otherwise
1828 */
1829static int
1830nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1831 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1832{
1833 if (ctxt->nameNr >= ctxt->nameMax) {
1834 const xmlChar * *tmp;
1835 void **tmp2;
1836 ctxt->nameMax *= 2;
1837 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1838 ctxt->nameMax *
1839 sizeof(ctxt->nameTab[0]));
1840 if (tmp == NULL) {
1841 ctxt->nameMax /= 2;
1842 goto mem_error;
1843 }
1844 ctxt->nameTab = tmp;
1845 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1846 ctxt->nameMax * 3 *
1847 sizeof(ctxt->pushTab[0]));
1848 if (tmp2 == NULL) {
1849 ctxt->nameMax /= 2;
1850 goto mem_error;
1851 }
1852 ctxt->pushTab = tmp2;
1853 }
1854 ctxt->nameTab[ctxt->nameNr] = value;
1855 ctxt->name = value;
1856 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1857 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001858 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001859 return (ctxt->nameNr++);
1860mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001861 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 return (-1);
1863}
1864/**
1865 * nameNsPop:
1866 * @ctxt: an XML parser context
1867 *
1868 * Pops the top element/prefix/URI name from the name stack
1869 *
1870 * Returns the name just removed
1871 */
1872static const xmlChar *
1873nameNsPop(xmlParserCtxtPtr ctxt)
1874{
1875 const xmlChar *ret;
1876
1877 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001878 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001879 ctxt->nameNr--;
1880 if (ctxt->nameNr > 0)
1881 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1882 else
1883 ctxt->name = NULL;
1884 ret = ctxt->nameTab[ctxt->nameNr];
1885 ctxt->nameTab[ctxt->nameNr] = NULL;
1886 return (ret);
1887}
Daniel Veillarda2351322004-06-27 12:08:10 +00001888#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001889
1890/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001891 * namePush:
1892 * @ctxt: an XML parser context
1893 * @value: the element name
1894 *
1895 * Pushes a new element name on top of the name stack
1896 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001897 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001898 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001899int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001900namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001901{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001902 if (ctxt == NULL) return (-1);
1903
Daniel Veillard1c732d22002-11-30 11:22:59 +00001904 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001905 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001906 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001907 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001908 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001909 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001910 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001911 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001912 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001913 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001914 }
1915 ctxt->nameTab[ctxt->nameNr] = value;
1916 ctxt->name = value;
1917 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001918mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001919 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001920 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001921}
1922/**
1923 * namePop:
1924 * @ctxt: an XML parser context
1925 *
1926 * Pops the top element name from the name stack
1927 *
1928 * Returns the name just removed
1929 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001930const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001931namePop(xmlParserCtxtPtr ctxt)
1932{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001933 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001934
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001935 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1936 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001937 ctxt->nameNr--;
1938 if (ctxt->nameNr > 0)
1939 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1940 else
1941 ctxt->name = NULL;
1942 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001943 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001944 return (ret);
1945}
Owen Taylor3473f882001-02-23 17:55:21 +00001946
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001947static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001948 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001949 int *tmp;
1950
Owen Taylor3473f882001-02-23 17:55:21 +00001951 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001952 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1953 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1954 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001955 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001956 ctxt->spaceMax /=2;
1957 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001958 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001959 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001960 }
1961 ctxt->spaceTab[ctxt->spaceNr] = val;
1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1963 return(ctxt->spaceNr++);
1964}
1965
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001966static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001967 int ret;
1968 if (ctxt->spaceNr <= 0) return(0);
1969 ctxt->spaceNr--;
1970 if (ctxt->spaceNr > 0)
1971 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1972 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001973 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001974 ret = ctxt->spaceTab[ctxt->spaceNr];
1975 ctxt->spaceTab[ctxt->spaceNr] = -1;
1976 return(ret);
1977}
1978
1979/*
1980 * Macros for accessing the content. Those should be used only by the parser,
1981 * and not exported.
1982 *
1983 * Dirty macros, i.e. one often need to make assumption on the context to
1984 * use them
1985 *
1986 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1987 * To be used with extreme caution since operations consuming
1988 * characters may move the input buffer to a different location !
1989 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1990 * This should be used internally by the parser
1991 * only to compare to ASCII values otherwise it would break when
1992 * running with UTF-8 encoding.
1993 * RAW same as CUR but in the input buffer, bypass any token
1994 * extraction that may have been done
1995 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1996 * to compare on ASCII based substring.
1997 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001998 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001999 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002000 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002001 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2002 *
2003 * NEXT Skip to the next character, this does the proper decoding
2004 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002005 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002006 * CUR_CHAR(l) returns the current unicode character (int), set l
2007 * to the number of xmlChars used for the encoding [0-5].
2008 * CUR_SCHAR same but operate on a string instead of the context
2009 * COPY_BUF copy the current unicode char to the target buffer, increment
2010 * the index
2011 * GROW, SHRINK handling of input buffers
2012 */
2013
Daniel Veillardfdc91562002-07-01 21:52:03 +00002014#define RAW (*ctxt->input->cur)
2015#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002016#define NXT(val) ctxt->input->cur[(val)]
2017#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002018#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002019
Daniel Veillarda07050d2003-10-19 14:46:32 +00002020#define CMP4( s, c1, c2, c3, c4 ) \
2021 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2022 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2023#define CMP5( s, c1, c2, c3, c4, c5 ) \
2024 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2025#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2026 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2027#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2028 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2029#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2030 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2031#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2032 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2033 ((unsigned char *) s)[ 8 ] == c9 )
2034#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2035 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2036 ((unsigned char *) s)[ 9 ] == c10 )
2037
Owen Taylor3473f882001-02-23 17:55:21 +00002038#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002039 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002040 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002041 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002042 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2043 xmlPopInput(ctxt); \
2044 } while (0)
2045
Daniel Veillard0b787f32004-03-26 17:29:53 +00002046#define SKIPL(val) do { \
2047 int skipl; \
2048 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002049 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002050 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002051 } else ctxt->input->col++; \
2052 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002053 ctxt->input->cur++; \
2054 } \
2055 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2056 if ((*ctxt->input->cur == 0) && \
2057 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2058 xmlPopInput(ctxt); \
2059 } while (0)
2060
Daniel Veillarda880b122003-04-21 21:36:41 +00002061#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002062 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2063 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002064 xmlSHRINK (ctxt);
2065
2066static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2067 xmlParserInputShrink(ctxt->input);
2068 if ((*ctxt->input->cur == 0) &&
2069 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2070 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002071 }
Owen Taylor3473f882001-02-23 17:55:21 +00002072
Daniel Veillarda880b122003-04-21 21:36:41 +00002073#define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002075 xmlGROW (ctxt);
2076
2077static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002083 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002084 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002086 xmlHaltParser(ctxt);
2087 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002088 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002090 if ((ctxt->input->cur > ctxt->input->end) ||
2091 (ctxt->input->cur < ctxt->input->base)) {
2092 xmlHaltParser(ctxt);
2093 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2094 return;
2095 }
Daniel Veillard59df7832010-02-02 10:24:01 +01002096 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002097 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2098 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002099}
Owen Taylor3473f882001-02-23 17:55:21 +00002100
2101#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103#define NEXT xmlNextChar(ctxt)
2104
Daniel Veillard21a0f912001-02-25 19:54:14 +00002105#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002106 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002109 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
Owen Taylor3473f882001-02-23 17:55:21 +00002113#define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002117 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002118 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002119 } while (0)
2120
2121#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2122#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2123
2124#define COPY_BUF(l,b,i,v) \
2125 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002126 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002127
2128/**
2129 * xmlSkipBlankChars:
2130 * @ctxt: the XML parser context
2131 *
2132 * skip all blanks character found at that point in the input streams.
2133 * It pops up finished entities in the process if allowable at that point.
2134 *
2135 * Returns the number of space chars skipped
2136 */
2137
2138int
2139xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002140 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002141
2142 /*
2143 * It's Okay to use CUR/NEXT here since all the blanks are on
2144 * the ASCII range.
2145 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002146 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2147 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002148 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002149 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002150 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002151 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002152 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002153 if (*cur == '\n') {
2154 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002155 } else {
2156 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002157 }
2158 cur++;
2159 res++;
2160 if (*cur == 0) {
2161 ctxt->input->cur = cur;
2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2163 cur = ctxt->input->cur;
2164 }
2165 }
2166 ctxt->input->cur = cur;
2167 } else {
2168 int cur;
2169 do {
2170 cur = CUR;
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002171 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2172 (ctxt->instate != XML_PARSER_EOF))) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002173 NEXT;
2174 cur = CUR;
2175 res++;
2176 }
2177 while ((cur == 0) && (ctxt->inputNr > 1) &&
2178 (ctxt->instate != XML_PARSER_COMMENT)) {
2179 xmlPopInput(ctxt);
2180 cur = CUR;
2181 }
2182 /*
2183 * Need to handle support of entities branching here
2184 */
2185 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002186 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2187 (ctxt->instate != XML_PARSER_EOF));
Daniel Veillard02141ea2001-04-30 11:46:40 +00002188 }
Owen Taylor3473f882001-02-23 17:55:21 +00002189 return(res);
2190}
2191
2192/************************************************************************
2193 * *
2194 * Commodity functions to handle entities *
2195 * *
2196 ************************************************************************/
2197
2198/**
2199 * xmlPopInput:
2200 * @ctxt: an XML parser context
2201 *
2202 * xmlPopInput: the current input pointed by ctxt->input came to an end
2203 * pop it and return the next char.
2204 *
2205 * Returns the current xmlChar in the parser context
2206 */
2207xmlChar
2208xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002209 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002210 if (xmlParserDebugEntities)
2211 xmlGenericError(xmlGenericErrorContext,
2212 "Popping input %d\n", ctxt->inputNr);
2213 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002214 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002215 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2216 return(xmlPopInput(ctxt));
2217 return(CUR);
2218}
2219
2220/**
2221 * xmlPushInput:
2222 * @ctxt: an XML parser context
2223 * @input: an XML parser input fragment (entity, XML fragment ...).
2224 *
2225 * xmlPushInput: switch to a new input stream which is stacked on top
2226 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002227 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002228 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002229int
Owen Taylor3473f882001-02-23 17:55:21 +00002230xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002231 int ret;
2232 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002233
2234 if (xmlParserDebugEntities) {
2235 if ((ctxt->input != NULL) && (ctxt->input->filename))
2236 xmlGenericError(xmlGenericErrorContext,
2237 "%s(%d): ", ctxt->input->filename,
2238 ctxt->input->line);
2239 xmlGenericError(xmlGenericErrorContext,
2240 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2241 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002242 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002243 if (ctxt->instate == XML_PARSER_EOF)
2244 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002245 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002246 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002247}
2248
2249/**
2250 * xmlParseCharRef:
2251 * @ctxt: an XML parser context
2252 *
2253 * parse Reference declarations
2254 *
2255 * [66] CharRef ::= '&#' [0-9]+ ';' |
2256 * '&#x' [0-9a-fA-F]+ ';'
2257 *
2258 * [ WFC: Legal Character ]
2259 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002260 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002261 *
2262 * Returns the value parsed (as an int), 0 in case of error
2263 */
2264int
2265xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002266 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002267 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002268 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002269
Owen Taylor3473f882001-02-23 17:55:21 +00002270 /*
2271 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2272 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002273 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002274 (NXT(2) == 'x')) {
2275 SKIP(3);
2276 GROW;
2277 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002278 if (count++ > 20) {
2279 count = 0;
2280 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002281 if (ctxt->instate == XML_PARSER_EOF)
2282 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002283 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002284 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002285 val = val * 16 + (CUR - '0');
2286 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2287 val = val * 16 + (CUR - 'a') + 10;
2288 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2289 val = val * 16 + (CUR - 'A') + 10;
2290 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002291 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002292 val = 0;
2293 break;
2294 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002295 if (val > 0x10FFFF)
2296 outofrange = val;
2297
Owen Taylor3473f882001-02-23 17:55:21 +00002298 NEXT;
2299 count++;
2300 }
2301 if (RAW == ';') {
2302 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002303 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002304 ctxt->nbChars ++;
2305 ctxt->input->cur++;
2306 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002307 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002308 SKIP(2);
2309 GROW;
2310 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002311 if (count++ > 20) {
2312 count = 0;
2313 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002314 if (ctxt->instate == XML_PARSER_EOF)
2315 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002316 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002317 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002318 val = val * 10 + (CUR - '0');
2319 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002320 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002321 val = 0;
2322 break;
2323 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002324 if (val > 0x10FFFF)
2325 outofrange = val;
2326
Owen Taylor3473f882001-02-23 17:55:21 +00002327 NEXT;
2328 count++;
2329 }
2330 if (RAW == ';') {
2331 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002332 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002333 ctxt->nbChars ++;
2334 ctxt->input->cur++;
2335 }
2336 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002337 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002338 }
2339
2340 /*
2341 * [ WFC: Legal Character ]
2342 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002343 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002344 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002345 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002346 return(val);
2347 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002348 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2349 "xmlParseCharRef: invalid xmlChar value %d\n",
2350 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002351 }
2352 return(0);
2353}
2354
2355/**
2356 * xmlParseStringCharRef:
2357 * @ctxt: an XML parser context
2358 * @str: a pointer to an index in the string
2359 *
2360 * parse Reference declarations, variant parsing from a string rather
2361 * than an an input flow.
2362 *
2363 * [66] CharRef ::= '&#' [0-9]+ ';' |
2364 * '&#x' [0-9a-fA-F]+ ';'
2365 *
2366 * [ WFC: Legal Character ]
2367 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002368 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002369 *
2370 * Returns the value parsed (as an int), 0 in case of error, str will be
2371 * updated to the current value of the index
2372 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002373static int
Owen Taylor3473f882001-02-23 17:55:21 +00002374xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2375 const xmlChar *ptr;
2376 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002377 unsigned int val = 0;
2378 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002379
2380 if ((str == NULL) || (*str == NULL)) return(0);
2381 ptr = *str;
2382 cur = *ptr;
2383 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2384 ptr += 3;
2385 cur = *ptr;
2386 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002387 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002388 val = val * 16 + (cur - '0');
2389 else if ((cur >= 'a') && (cur <= 'f'))
2390 val = val * 16 + (cur - 'a') + 10;
2391 else if ((cur >= 'A') && (cur <= 'F'))
2392 val = val * 16 + (cur - 'A') + 10;
2393 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002394 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002395 val = 0;
2396 break;
2397 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002398 if (val > 0x10FFFF)
2399 outofrange = val;
2400
Owen Taylor3473f882001-02-23 17:55:21 +00002401 ptr++;
2402 cur = *ptr;
2403 }
2404 if (cur == ';')
2405 ptr++;
2406 } else if ((cur == '&') && (ptr[1] == '#')){
2407 ptr += 2;
2408 cur = *ptr;
2409 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002410 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002411 val = val * 10 + (cur - '0');
2412 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002413 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002414 val = 0;
2415 break;
2416 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002417 if (val > 0x10FFFF)
2418 outofrange = val;
2419
Owen Taylor3473f882001-02-23 17:55:21 +00002420 ptr++;
2421 cur = *ptr;
2422 }
2423 if (cur == ';')
2424 ptr++;
2425 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002426 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002427 return(0);
2428 }
2429 *str = ptr;
2430
2431 /*
2432 * [ WFC: Legal Character ]
2433 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002434 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002435 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002436 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002437 return(val);
2438 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002439 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2440 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2441 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 }
2443 return(0);
2444}
2445
2446/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002447 * xmlNewBlanksWrapperInputStream:
2448 * @ctxt: an XML parser context
2449 * @entity: an Entity pointer
2450 *
2451 * Create a new input stream for wrapping
2452 * blanks around a PEReference
2453 *
2454 * Returns the new input stream or NULL
2455 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002456
Daniel Veillardf5582f12002-06-11 10:08:16 +00002457static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002458
Daniel Veillardf4862f02002-09-10 11:13:43 +00002459static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002460xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2461 xmlParserInputPtr input;
2462 xmlChar *buffer;
2463 size_t length;
2464 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002465 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2466 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002467 return(NULL);
2468 }
2469 if (xmlParserDebugEntities)
2470 xmlGenericError(xmlGenericErrorContext,
2471 "new blanks wrapper for entity: %s\n", entity->name);
2472 input = xmlNewInputStream(ctxt);
2473 if (input == NULL) {
2474 return(NULL);
2475 }
2476 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002477 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002478 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002479 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002480 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002481 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002482 }
2483 buffer [0] = ' ';
2484 buffer [1] = '%';
2485 buffer [length-3] = ';';
2486 buffer [length-2] = ' ';
2487 buffer [length-1] = 0;
2488 memcpy(buffer + 2, entity->name, length - 5);
2489 input->free = deallocblankswrapper;
2490 input->base = buffer;
2491 input->cur = buffer;
2492 input->length = length;
2493 input->end = &buffer[length];
2494 return(input);
2495}
2496
2497/**
Owen Taylor3473f882001-02-23 17:55:21 +00002498 * xmlParserHandlePEReference:
2499 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002500 *
Owen Taylor3473f882001-02-23 17:55:21 +00002501 * [69] PEReference ::= '%' Name ';'
2502 *
2503 * [ WFC: No Recursion ]
2504 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002505 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002506 *
2507 * [ WFC: Entity Declared ]
2508 * In a document without any DTD, a document with only an internal DTD
2509 * subset which contains no parameter entity references, or a document
2510 * with "standalone='yes'", ... ... The declaration of a parameter
2511 * entity must precede any reference to it...
2512 *
2513 * [ VC: Entity Declared ]
2514 * In a document with an external subset or external parameter entities
2515 * with "standalone='no'", ... ... The declaration of a parameter entity
2516 * must precede any reference to it...
2517 *
2518 * [ WFC: In DTD ]
2519 * Parameter-entity references may only appear in the DTD.
2520 * NOTE: misleading but this is handled.
2521 *
2522 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002523 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002524 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002525 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002526 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002527 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002528 */
2529void
2530xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002531 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 xmlEntityPtr entity = NULL;
2533 xmlParserInputPtr input;
2534
Owen Taylor3473f882001-02-23 17:55:21 +00002535 if (RAW != '%') return;
2536 switch(ctxt->instate) {
2537 case XML_PARSER_CDATA_SECTION:
2538 return;
2539 case XML_PARSER_COMMENT:
2540 return;
2541 case XML_PARSER_START_TAG:
2542 return;
2543 case XML_PARSER_END_TAG:
2544 return;
2545 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002546 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002547 return;
2548 case XML_PARSER_PROLOG:
2549 case XML_PARSER_START:
2550 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002551 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002552 return;
2553 case XML_PARSER_ENTITY_DECL:
2554 case XML_PARSER_CONTENT:
2555 case XML_PARSER_ATTRIBUTE_VALUE:
2556 case XML_PARSER_PI:
2557 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002558 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002559 /* we just ignore it there */
2560 return;
2561 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002562 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002563 return;
2564 case XML_PARSER_ENTITY_VALUE:
2565 /*
2566 * NOTE: in the case of entity values, we don't do the
2567 * substitution here since we need the literal
2568 * entity value to be able to save the internal
2569 * subset of the document.
2570 * This will be handled by xmlStringDecodeEntities
2571 */
2572 return;
2573 case XML_PARSER_DTD:
2574 /*
2575 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2576 * In the internal DTD subset, parameter-entity references
2577 * can occur only where markup declarations can occur, not
2578 * within markup declarations.
2579 * In that case this is handled in xmlParseMarkupDecl
2580 */
2581 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2582 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002583 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002584 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002585 break;
2586 case XML_PARSER_IGNORE:
2587 return;
2588 }
2589
2590 NEXT;
2591 name = xmlParseName(ctxt);
2592 if (xmlParserDebugEntities)
2593 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002594 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002595 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002596 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002597 } else {
2598 if (RAW == ';') {
2599 NEXT;
2600 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2601 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002602 if (ctxt->instate == XML_PARSER_EOF)
2603 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002604 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002605
Owen Taylor3473f882001-02-23 17:55:21 +00002606 /*
2607 * [ WFC: Entity Declared ]
2608 * In a document without any DTD, a document with only an
2609 * internal DTD subset which contains no parameter entity
2610 * references, or a document with "standalone='yes'", ...
2611 * ... The declaration of a parameter entity must precede
2612 * any reference to it...
2613 */
2614 if ((ctxt->standalone == 1) ||
2615 ((ctxt->hasExternalSubset == 0) &&
2616 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002617 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002618 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002619 } else {
2620 /*
2621 * [ VC: Entity Declared ]
2622 * In a document with an external subset or external
2623 * parameter entities with "standalone='no'", ...
2624 * ... The declaration of a parameter entity must precede
2625 * any reference to it...
2626 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002627 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2628 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2629 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002630 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002631 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002632 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2633 "PEReference: %%%s; not found\n",
2634 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002635 ctxt->valid = 0;
2636 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002637 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002638 } else if (ctxt->input->free != deallocblankswrapper) {
2639 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002640 if (xmlPushInput(ctxt, input) < 0)
2641 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002642 } else {
2643 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2644 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002645 xmlChar start[4];
2646 xmlCharEncoding enc;
2647
Owen Taylor3473f882001-02-23 17:55:21 +00002648 /*
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002649 * Note: external parameter entities will not be loaded, it
2650 * is not required for a non-validating parser, unless the
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002651 * option of validating, or substituting entities were
2652 * given. Doing so is far more secure as the parser will
2653 * only process data coming from the document entity by
2654 * default.
2655 */
2656 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2657 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2658 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002659 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2660 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2661 (ctxt->replaceEntities == 0) &&
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002662 (ctxt->validate == 0))
2663 return;
2664
2665 /*
Owen Taylor3473f882001-02-23 17:55:21 +00002666 * handle the extra spaces added before and after
2667 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002668 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002669 */
2670 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002671 if (xmlPushInput(ctxt, input) < 0)
2672 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002673
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002674 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002675 * Get the 4 first bytes and decode the charset
2676 * if enc != XML_CHAR_ENCODING_NONE
2677 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002678 * Note that, since we may have some non-UTF8
2679 * encoding (like UTF16, bug 135229), the 'length'
2680 * is not known, but we can calculate based upon
2681 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002682 */
2683 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002684 if (ctxt->instate == XML_PARSER_EOF)
2685 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002686 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002687 start[0] = RAW;
2688 start[1] = NXT(1);
2689 start[2] = NXT(2);
2690 start[3] = NXT(3);
2691 enc = xmlDetectCharEncoding(start, 4);
2692 if (enc != XML_CHAR_ENCODING_NONE) {
2693 xmlSwitchEncoding(ctxt, enc);
2694 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002695 }
2696
Owen Taylor3473f882001-02-23 17:55:21 +00002697 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002698 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2699 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002700 xmlParseTextDecl(ctxt);
2701 }
Owen Taylor3473f882001-02-23 17:55:21 +00002702 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002703 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2704 "PEReference: %s is not a parameter entity\n",
2705 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002706 }
2707 }
2708 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002709 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002710 }
Owen Taylor3473f882001-02-23 17:55:21 +00002711 }
2712}
2713
2714/*
2715 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002716 * buffer##_size is expected to be a size_t
2717 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002718 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002719#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002720 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002721 size_t new_size = buffer##_size * 2 + n; \
2722 if (new_size < buffer##_size) goto mem_error; \
2723 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002724 if (tmp == NULL) goto mem_error; \
2725 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002726 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002727}
2728
2729/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002730 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002731 * @ctxt: the parser context
2732 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002733 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002734 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2735 * @end: an end marker xmlChar, 0 if none
2736 * @end2: an end marker xmlChar, 0 if none
2737 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002738 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002739 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002740 *
2741 * [67] Reference ::= EntityRef | CharRef
2742 *
2743 * [69] PEReference ::= '%' Name ';'
2744 *
2745 * Returns A newly allocated string with the substitution done. The caller
2746 * must deallocate it !
2747 */
2748xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002749xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2750 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002751 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002752 size_t buffer_size = 0;
2753 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002754
2755 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002756 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002757 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002758 xmlEntityPtr ent;
2759 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002760
Daniel Veillarda82b1822004-11-08 16:24:57 +00002761 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002762 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002763 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002764
Daniel Veillard0161e632008-08-28 15:36:32 +00002765 if (((ctxt->depth > 40) &&
2766 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2767 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002768 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002769 return(NULL);
2770 }
2771
2772 /*
2773 * allocate a translation buffer.
2774 */
2775 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002776 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002777 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002778
2779 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002780 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002781 * we are operating on already parsed values.
2782 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002783 if (str < last)
2784 c = CUR_SCHAR(str, l);
2785 else
2786 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002787 while ((c != 0) && (c != end) && /* non input consuming loop */
2788 (c != end2) && (c != end3)) {
2789
2790 if (c == 0) break;
2791 if ((c == '&') && (str[1] == '#')) {
2792 int val = xmlParseStringCharRef(ctxt, &str);
2793 if (val != 0) {
2794 COPY_BUF(0,buffer,nbchars,val);
2795 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002798 }
Owen Taylor3473f882001-02-23 17:55:21 +00002799 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2800 if (xmlParserDebugEntities)
2801 xmlGenericError(xmlGenericErrorContext,
2802 "String decoding Entity Reference: %.30s\n",
2803 str);
2804 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002805 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2806 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002807 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002808 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002809 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002810 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002811 if ((ent != NULL) &&
2812 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2813 if (ent->content != NULL) {
2814 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002815 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002817 }
Owen Taylor3473f882001-02-23 17:55:21 +00002818 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002819 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2820 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002821 }
2822 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002823 ctxt->depth++;
2824 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2825 0, 0, 0);
2826 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002827
David Drysdale69030712015-11-20 11:13:45 +08002828 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2829 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2830 goto int_error;
2831
Owen Taylor3473f882001-02-23 17:55:21 +00002832 if (rep != NULL) {
2833 current = rep;
2834 while (*current != 0) { /* non input consuming loop */
2835 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002836 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002837 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002838 goto int_error;
2839 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002840 }
2841 }
2842 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002843 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002844 }
2845 } else if (ent != NULL) {
2846 int i = xmlStrlen(ent->name);
2847 const xmlChar *cur = ent->name;
2848
2849 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002850 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002851 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 }
2853 for (;i > 0;i--)
2854 buffer[nbchars++] = *cur++;
2855 buffer[nbchars++] = ';';
2856 }
2857 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2858 if (xmlParserDebugEntities)
2859 xmlGenericError(xmlGenericErrorContext,
2860 "String decoding PE Reference: %.30s\n", str);
2861 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002862 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2863 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002864 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002865 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002866 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002867 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002868 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002869 /*
2870 * Note: external parsed entities will not be loaded,
2871 * it is not required for a non-validating parser to
2872 * complete external PEreferences coming from the
2873 * internal subset
2874 */
2875 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2876 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2877 (ctxt->validate != 0)) {
2878 xmlLoadEntityContent(ctxt, ent);
2879 } else {
2880 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2881 "not validating will not read content for PE entity %s\n",
2882 ent->name, NULL);
2883 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002884 }
Owen Taylor3473f882001-02-23 17:55:21 +00002885 ctxt->depth++;
2886 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2887 0, 0, 0);
2888 ctxt->depth--;
2889 if (rep != NULL) {
2890 current = rep;
2891 while (*current != 0) { /* non input consuming loop */
2892 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002893 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002894 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002895 goto int_error;
2896 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002897 }
2898 }
2899 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002900 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002901 }
2902 }
2903 } else {
2904 COPY_BUF(l,buffer,nbchars,c);
2905 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002906 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2907 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002908 }
2909 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002910 if (str < last)
2911 c = CUR_SCHAR(str, l);
2912 else
2913 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002914 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002915 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002916 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002917
2918mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002919 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002920int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002921 if (rep != NULL)
2922 xmlFree(rep);
2923 if (buffer != NULL)
2924 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002925 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002926}
2927
Daniel Veillarde57ec792003-09-10 10:50:59 +00002928/**
2929 * xmlStringDecodeEntities:
2930 * @ctxt: the parser context
2931 * @str: the input string
2932 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2933 * @end: an end marker xmlChar, 0 if none
2934 * @end2: an end marker xmlChar, 0 if none
2935 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002936 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002937 * Takes a entity string content and process to do the adequate substitutions.
2938 *
2939 * [67] Reference ::= EntityRef | CharRef
2940 *
2941 * [69] PEReference ::= '%' Name ';'
2942 *
2943 * Returns A newly allocated string with the substitution done. The caller
2944 * must deallocate it !
2945 */
2946xmlChar *
2947xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2948 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002949 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002950 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2951 end, end2, end3));
2952}
Owen Taylor3473f882001-02-23 17:55:21 +00002953
2954/************************************************************************
2955 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002956 * Commodity functions, cleanup needed ? *
2957 * *
2958 ************************************************************************/
2959
2960/**
2961 * areBlanks:
2962 * @ctxt: an XML parser context
2963 * @str: a xmlChar *
2964 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002965 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002966 *
2967 * Is this a sequence of blank chars that one can ignore ?
2968 *
2969 * Returns 1 if ignorable 0 otherwise.
2970 */
2971
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002972static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2973 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002974 int i, ret;
2975 xmlNodePtr lastChild;
2976
Daniel Veillard05c13a22001-09-09 08:38:09 +00002977 /*
2978 * Don't spend time trying to differentiate them, the same callback is
2979 * used !
2980 */
2981 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002982 return(0);
2983
Owen Taylor3473f882001-02-23 17:55:21 +00002984 /*
2985 * Check for xml:space value.
2986 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002987 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2988 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002989 return(0);
2990
2991 /*
2992 * Check that the string is made of blanks
2993 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002994 if (blank_chars == 0) {
2995 for (i = 0;i < len;i++)
2996 if (!(IS_BLANK_CH(str[i]))) return(0);
2997 }
Owen Taylor3473f882001-02-23 17:55:21 +00002998
2999 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003000 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00003001 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00003002 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003003 if (ctxt->myDoc != NULL) {
3004 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3005 if (ret == 0) return(1);
3006 if (ret == 1) return(0);
3007 }
3008
3009 /*
3010 * Otherwise, heuristic :-\
3011 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00003012 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003013 if ((ctxt->node->children == NULL) &&
3014 (RAW == '<') && (NXT(1) == '/')) return(0);
3015
3016 lastChild = xmlGetLastChild(ctxt->node);
3017 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00003018 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3019 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003020 } else if (xmlNodeIsText(lastChild))
3021 return(0);
3022 else if ((ctxt->node->children != NULL) &&
3023 (xmlNodeIsText(ctxt->node->children)))
3024 return(0);
3025 return(1);
3026}
3027
Owen Taylor3473f882001-02-23 17:55:21 +00003028/************************************************************************
3029 * *
3030 * Extra stuff for namespace support *
3031 * Relates to http://www.w3.org/TR/WD-xml-names *
3032 * *
3033 ************************************************************************/
3034
3035/**
3036 * xmlSplitQName:
3037 * @ctxt: an XML parser context
3038 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003039 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00003040 *
3041 * parse an UTF8 encoded XML qualified name string
3042 *
3043 * [NS 5] QName ::= (Prefix ':')? LocalPart
3044 *
3045 * [NS 6] Prefix ::= NCName
3046 *
3047 * [NS 7] LocalPart ::= NCName
3048 *
3049 * Returns the local part, and prefix is updated
3050 * to get the Prefix if any.
3051 */
3052
3053xmlChar *
3054xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3055 xmlChar buf[XML_MAX_NAMELEN + 5];
3056 xmlChar *buffer = NULL;
3057 int len = 0;
3058 int max = XML_MAX_NAMELEN;
3059 xmlChar *ret = NULL;
3060 const xmlChar *cur = name;
3061 int c;
3062
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003063 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003064 *prefix = NULL;
3065
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00003066 if (cur == NULL) return(NULL);
3067
Owen Taylor3473f882001-02-23 17:55:21 +00003068#ifndef XML_XML_NAMESPACE
3069 /* xml: prefix is not really a namespace */
3070 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3071 (cur[2] == 'l') && (cur[3] == ':'))
3072 return(xmlStrdup(name));
3073#endif
3074
Daniel Veillard597bc482003-07-24 16:08:28 +00003075 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00003076 if (cur[0] == ':')
3077 return(xmlStrdup(name));
3078
3079 c = *cur++;
3080 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3081 buf[len++] = c;
3082 c = *cur++;
3083 }
3084 if (len >= max) {
3085 /*
3086 * Okay someone managed to make a huge name, so he's ready to pay
3087 * for the processing speed.
3088 */
3089 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003090
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003091 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003092 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003093 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003094 return(NULL);
3095 }
3096 memcpy(buffer, buf, len);
3097 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3098 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003099 xmlChar *tmp;
3100
Owen Taylor3473f882001-02-23 17:55:21 +00003101 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003102 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003103 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003104 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003105 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003106 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003107 return(NULL);
3108 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003109 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003110 }
3111 buffer[len++] = c;
3112 c = *cur++;
3113 }
3114 buffer[len] = 0;
3115 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003116
Daniel Veillard597bc482003-07-24 16:08:28 +00003117 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003118 if (buffer != NULL)
3119 xmlFree(buffer);
3120 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003121 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003122 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003123
Owen Taylor3473f882001-02-23 17:55:21 +00003124 if (buffer == NULL)
3125 ret = xmlStrndup(buf, len);
3126 else {
3127 ret = buffer;
3128 buffer = NULL;
3129 max = XML_MAX_NAMELEN;
3130 }
3131
3132
3133 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003134 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003135 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003136 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003137 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003138 }
Owen Taylor3473f882001-02-23 17:55:21 +00003139 len = 0;
3140
Daniel Veillardbb284f42002-10-16 18:02:47 +00003141 /*
3142 * Check that the first character is proper to start
3143 * a new name
3144 */
3145 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3146 ((c >= 0x41) && (c <= 0x5A)) ||
3147 (c == '_') || (c == ':'))) {
3148 int l;
3149 int first = CUR_SCHAR(cur, l);
3150
3151 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003152 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003153 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003154 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003155 }
3156 }
3157 cur++;
3158
Owen Taylor3473f882001-02-23 17:55:21 +00003159 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3160 buf[len++] = c;
3161 c = *cur++;
3162 }
3163 if (len >= max) {
3164 /*
3165 * Okay someone managed to make a huge name, so he's ready to pay
3166 * for the processing speed.
3167 */
3168 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003169
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003170 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003171 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003172 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003173 return(NULL);
3174 }
3175 memcpy(buffer, buf, len);
3176 while (c != 0) { /* tested bigname2.xml */
3177 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003178 xmlChar *tmp;
3179
Owen Taylor3473f882001-02-23 17:55:21 +00003180 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003181 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003182 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003183 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003185 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003186 return(NULL);
3187 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003188 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003189 }
3190 buffer[len++] = c;
3191 c = *cur++;
3192 }
3193 buffer[len] = 0;
3194 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003195
Owen Taylor3473f882001-02-23 17:55:21 +00003196 if (buffer == NULL)
3197 ret = xmlStrndup(buf, len);
3198 else {
3199 ret = buffer;
3200 }
3201 }
3202
3203 return(ret);
3204}
3205
3206/************************************************************************
3207 * *
3208 * The parser itself *
3209 * Relates to http://www.w3.org/TR/REC-xml *
3210 * *
3211 ************************************************************************/
3212
Daniel Veillard34e3f642008-07-29 09:02:27 +00003213/************************************************************************
3214 * *
3215 * Routines to parse Name, NCName and NmToken *
3216 * *
3217 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003218#ifdef DEBUG
3219static unsigned long nbParseName = 0;
3220static unsigned long nbParseNmToken = 0;
3221static unsigned long nbParseNCName = 0;
3222static unsigned long nbParseNCNameComplex = 0;
3223static unsigned long nbParseNameComplex = 0;
3224static unsigned long nbParseStringName = 0;
3225#endif
3226
Daniel Veillard34e3f642008-07-29 09:02:27 +00003227/*
3228 * The two following functions are related to the change of accepted
3229 * characters for Name and NmToken in the Revision 5 of XML-1.0
3230 * They correspond to the modified production [4] and the new production [4a]
3231 * changes in that revision. Also note that the macros used for the
3232 * productions Letter, Digit, CombiningChar and Extender are not needed
3233 * anymore.
3234 * We still keep compatibility to pre-revision5 parsing semantic if the
3235 * new XML_PARSE_OLD10 option is given to the parser.
3236 */
3237static int
3238xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3240 /*
3241 * Use the new checks of production [4] [4a] amd [5] of the
3242 * Update 5 of XML-1.0
3243 */
3244 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3245 (((c >= 'a') && (c <= 'z')) ||
3246 ((c >= 'A') && (c <= 'Z')) ||
3247 (c == '_') || (c == ':') ||
3248 ((c >= 0xC0) && (c <= 0xD6)) ||
3249 ((c >= 0xD8) && (c <= 0xF6)) ||
3250 ((c >= 0xF8) && (c <= 0x2FF)) ||
3251 ((c >= 0x370) && (c <= 0x37D)) ||
3252 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253 ((c >= 0x200C) && (c <= 0x200D)) ||
3254 ((c >= 0x2070) && (c <= 0x218F)) ||
3255 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259 ((c >= 0x10000) && (c <= 0xEFFFF))))
3260 return(1);
3261 } else {
3262 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3263 return(1);
3264 }
3265 return(0);
3266}
3267
3268static int
3269xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3270 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3271 /*
3272 * Use the new checks of production [4] [4a] amd [5] of the
3273 * Update 5 of XML-1.0
3274 */
3275 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3276 (((c >= 'a') && (c <= 'z')) ||
3277 ((c >= 'A') && (c <= 'Z')) ||
3278 ((c >= '0') && (c <= '9')) || /* !start */
3279 (c == '_') || (c == ':') ||
3280 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3281 ((c >= 0xC0) && (c <= 0xD6)) ||
3282 ((c >= 0xD8) && (c <= 0xF6)) ||
3283 ((c >= 0xF8) && (c <= 0x2FF)) ||
3284 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3285 ((c >= 0x370) && (c <= 0x37D)) ||
3286 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3287 ((c >= 0x200C) && (c <= 0x200D)) ||
3288 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3289 ((c >= 0x2070) && (c <= 0x218F)) ||
3290 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3291 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3292 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3293 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3294 ((c >= 0x10000) && (c <= 0xEFFFF))))
3295 return(1);
3296 } else {
3297 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003299 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003300 (IS_COMBINING(c)) ||
3301 (IS_EXTENDER(c)))
3302 return(1);
3303 }
3304 return(0);
3305}
3306
Daniel Veillarde57ec792003-09-10 10:50:59 +00003307static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003308 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003309
Daniel Veillard34e3f642008-07-29 09:02:27 +00003310static const xmlChar *
3311xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3312 int len = 0, l;
3313 int c;
3314 int count = 0;
3315
Daniel Veillardc6561462009-03-25 10:22:31 +00003316#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003317 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003318#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003319
3320 /*
3321 * Handler for more complex cases
3322 */
3323 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003324 if (ctxt->instate == XML_PARSER_EOF)
3325 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003326 c = CUR_CHAR(l);
3327 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3328 /*
3329 * Use the new checks of production [4] [4a] amd [5] of the
3330 * Update 5 of XML-1.0
3331 */
3332 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3333 (!(((c >= 'a') && (c <= 'z')) ||
3334 ((c >= 'A') && (c <= 'Z')) ||
3335 (c == '_') || (c == ':') ||
3336 ((c >= 0xC0) && (c <= 0xD6)) ||
3337 ((c >= 0xD8) && (c <= 0xF6)) ||
3338 ((c >= 0xF8) && (c <= 0x2FF)) ||
3339 ((c >= 0x370) && (c <= 0x37D)) ||
3340 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3341 ((c >= 0x200C) && (c <= 0x200D)) ||
3342 ((c >= 0x2070) && (c <= 0x218F)) ||
3343 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3344 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3345 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3346 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3347 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3348 return(NULL);
3349 }
3350 len += l;
3351 NEXTL(l);
3352 c = CUR_CHAR(l);
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3354 (((c >= 'a') && (c <= 'z')) ||
3355 ((c >= 'A') && (c <= 'Z')) ||
3356 ((c >= '0') && (c <= '9')) || /* !start */
3357 (c == '_') || (c == ':') ||
3358 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3359 ((c >= 0xC0) && (c <= 0xD6)) ||
3360 ((c >= 0xD8) && (c <= 0xF6)) ||
3361 ((c >= 0xF8) && (c <= 0x2FF)) ||
3362 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3363 ((c >= 0x370) && (c <= 0x37D)) ||
3364 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3365 ((c >= 0x200C) && (c <= 0x200D)) ||
3366 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3367 ((c >= 0x2070) && (c <= 0x218F)) ||
3368 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3369 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3370 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3371 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3372 ((c >= 0x10000) && (c <= 0xEFFFF))
3373 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003374 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003375 count = 0;
3376 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003377 if (ctxt->instate == XML_PARSER_EOF)
3378 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003379 }
3380 len += l;
3381 NEXTL(l);
3382 c = CUR_CHAR(l);
3383 }
3384 } else {
3385 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3386 (!IS_LETTER(c) && (c != '_') &&
3387 (c != ':'))) {
3388 return(NULL);
3389 }
3390 len += l;
3391 NEXTL(l);
3392 c = CUR_CHAR(l);
3393
3394 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3395 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3396 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003397 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003398 (IS_COMBINING(c)) ||
3399 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003400 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401 count = 0;
3402 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003403 if (ctxt->instate == XML_PARSER_EOF)
3404 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405 }
3406 len += l;
3407 NEXTL(l);
3408 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003409 if (c == 0) {
3410 count = 0;
3411 GROW;
3412 if (ctxt->instate == XML_PARSER_EOF)
3413 return(NULL);
3414 c = CUR_CHAR(l);
3415 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003416 }
3417 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003418 if ((len > XML_MAX_NAME_LENGTH) &&
3419 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3420 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3421 return(NULL);
3422 }
Brian C. Young03657e72017-04-03 12:21:57 -07003423 if (ctxt->input->cur > ctxt->input->base && (*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) {
3424 if (ctxt->input->base > ctxt->input->cur - (len + 1)) {
3425 return(NULL);
3426 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
Brian C. Young03657e72017-04-03 12:21:57 -07003428 }
3429 if (ctxt->input->base > ctxt->input->cur - len) {
3430 return(NULL);
3431 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3433}
3434
Owen Taylor3473f882001-02-23 17:55:21 +00003435/**
3436 * xmlParseName:
3437 * @ctxt: an XML parser context
3438 *
3439 * parse an XML name.
3440 *
3441 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3442 * CombiningChar | Extender
3443 *
3444 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3445 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003446 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003447 *
3448 * Returns the Name parsed or NULL
3449 */
3450
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003451const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003452xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003453 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003454 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003455 int count = 0;
3456
3457 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003458
Daniel Veillardc6561462009-03-25 10:22:31 +00003459#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003460 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003461#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003462
Daniel Veillard48b2f892001-02-25 16:11:03 +00003463 /*
3464 * Accelerator for simple ASCII names
3465 */
3466 in = ctxt->input->cur;
3467 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3468 ((*in >= 0x41) && (*in <= 0x5A)) ||
3469 (*in == '_') || (*in == ':')) {
3470 in++;
3471 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3472 ((*in >= 0x41) && (*in <= 0x5A)) ||
3473 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003474 (*in == '_') || (*in == '-') ||
3475 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003476 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003477 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003478 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003479 if ((count > XML_MAX_NAME_LENGTH) &&
3480 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3481 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3482 return(NULL);
3483 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003484 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003485 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003486 ctxt->nbChars += count;
3487 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003488 if (ret == NULL)
3489 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003490 return(ret);
3491 }
3492 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003493 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003494 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003495}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003496
Daniel Veillard34e3f642008-07-29 09:02:27 +00003497static const xmlChar *
3498xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3499 int len = 0, l;
3500 int c;
3501 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003502 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003503
Daniel Veillardc6561462009-03-25 10:22:31 +00003504#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003505 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003506#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003507
3508 /*
3509 * Handler for more complex cases
3510 */
3511 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003512 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003513 c = CUR_CHAR(l);
3514 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3515 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3516 return(NULL);
3517 }
3518
3519 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3520 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003521 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003522 if ((len > XML_MAX_NAME_LENGTH) &&
3523 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3524 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3525 return(NULL);
3526 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003527 count = 0;
3528 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003529 if (ctxt->instate == XML_PARSER_EOF)
3530 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003531 }
3532 len += l;
3533 NEXTL(l);
3534 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003535 if (c == 0) {
3536 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003537 /*
3538 * when shrinking to extend the buffer we really need to preserve
3539 * the part of the name we already parsed. Hence rolling back
3540 * by current lenght.
3541 */
3542 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003543 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003544 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003545 if (ctxt->instate == XML_PARSER_EOF)
3546 return(NULL);
3547 c = CUR_CHAR(l);
3548 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003549 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003550 if ((len > XML_MAX_NAME_LENGTH) &&
3551 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3552 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3553 return(NULL);
3554 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003555 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003556}
3557
3558/**
3559 * xmlParseNCName:
3560 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003561 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003562 *
3563 * parse an XML name.
3564 *
3565 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3566 * CombiningChar | Extender
3567 *
3568 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3569 *
3570 * Returns the Name parsed or NULL
3571 */
3572
3573static const xmlChar *
3574xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003575 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003576 const xmlChar *ret;
3577 int count = 0;
3578
Daniel Veillardc6561462009-03-25 10:22:31 +00003579#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003580 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003581#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003582
3583 /*
3584 * Accelerator for simple ASCII names
3585 */
3586 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003587 e = ctxt->input->end;
3588 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3589 ((*in >= 0x41) && (*in <= 0x5A)) ||
3590 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003591 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003592 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3593 ((*in >= 0x41) && (*in <= 0x5A)) ||
3594 ((*in >= 0x30) && (*in <= 0x39)) ||
3595 (*in == '_') || (*in == '-') ||
3596 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003597 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003598 if (in >= e)
3599 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003600 if ((*in > 0) && (*in < 0x80)) {
3601 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003602 if ((count > XML_MAX_NAME_LENGTH) &&
3603 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3604 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3605 return(NULL);
3606 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003607 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3608 ctxt->input->cur = in;
3609 ctxt->nbChars += count;
3610 ctxt->input->col += count;
3611 if (ret == NULL) {
3612 xmlErrMemory(ctxt, NULL);
3613 }
3614 return(ret);
3615 }
3616 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003617complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003618 return(xmlParseNCNameComplex(ctxt));
3619}
3620
Daniel Veillard46de64e2002-05-29 08:21:33 +00003621/**
3622 * xmlParseNameAndCompare:
3623 * @ctxt: an XML parser context
3624 *
3625 * parse an XML name and compares for match
3626 * (specialized for endtag parsing)
3627 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003628 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3629 * and the name for mismatch
3630 */
3631
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003632static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003633xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003634 register const xmlChar *cmp = other;
3635 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003636 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003637
3638 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003639 if (ctxt->instate == XML_PARSER_EOF)
3640 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003641
Daniel Veillard46de64e2002-05-29 08:21:33 +00003642 in = ctxt->input->cur;
3643 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003644 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003645 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003646 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003647 }
William M. Brack76e95df2003-10-18 16:20:14 +00003648 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003649 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003650 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003651 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003652 }
3653 /* failure (or end of input buffer), check with full function */
3654 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003655 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003656 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003657 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003658 }
3659 return ret;
3660}
3661
Owen Taylor3473f882001-02-23 17:55:21 +00003662/**
3663 * xmlParseStringName:
3664 * @ctxt: an XML parser context
3665 * @str: a pointer to the string pointer (IN/OUT)
3666 *
3667 * parse an XML name.
3668 *
3669 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3670 * CombiningChar | Extender
3671 *
3672 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3673 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003674 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003675 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003676 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003677 * is updated to the current location in the string.
3678 */
3679
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003680static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003681xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3682 xmlChar buf[XML_MAX_NAMELEN + 5];
3683 const xmlChar *cur = *str;
3684 int len = 0, l;
3685 int c;
3686
Daniel Veillardc6561462009-03-25 10:22:31 +00003687#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003688 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003689#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003690
Owen Taylor3473f882001-02-23 17:55:21 +00003691 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003692 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003693 return(NULL);
3694 }
3695
Daniel Veillard34e3f642008-07-29 09:02:27 +00003696 COPY_BUF(l,buf,len,c);
3697 cur += l;
3698 c = CUR_SCHAR(cur, l);
3699 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003700 COPY_BUF(l,buf,len,c);
3701 cur += l;
3702 c = CUR_SCHAR(cur, l);
3703 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3704 /*
3705 * Okay someone managed to make a huge name, so he's ready to pay
3706 * for the processing speed.
3707 */
3708 xmlChar *buffer;
3709 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003710
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003711 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003712 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003713 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003714 return(NULL);
3715 }
3716 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003717 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003718 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003719 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003720
3721 if ((len > XML_MAX_NAME_LENGTH) &&
3722 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3723 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3724 xmlFree(buffer);
3725 return(NULL);
3726 }
Owen Taylor3473f882001-02-23 17:55:21 +00003727 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003728 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003729 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003730 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003731 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003732 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003733 return(NULL);
3734 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003735 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
3737 COPY_BUF(l,buffer,len,c);
3738 cur += l;
3739 c = CUR_SCHAR(cur, l);
3740 }
3741 buffer[len] = 0;
3742 *str = cur;
3743 return(buffer);
3744 }
3745 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003746 if ((len > XML_MAX_NAME_LENGTH) &&
3747 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3748 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3749 return(NULL);
3750 }
Owen Taylor3473f882001-02-23 17:55:21 +00003751 *str = cur;
3752 return(xmlStrndup(buf, len));
3753}
3754
3755/**
3756 * xmlParseNmtoken:
3757 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003758 *
Owen Taylor3473f882001-02-23 17:55:21 +00003759 * parse an XML Nmtoken.
3760 *
3761 * [7] Nmtoken ::= (NameChar)+
3762 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003763 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003764 *
3765 * Returns the Nmtoken parsed or NULL
3766 */
3767
3768xmlChar *
3769xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3770 xmlChar buf[XML_MAX_NAMELEN + 5];
3771 int len = 0, l;
3772 int c;
3773 int count = 0;
3774
Daniel Veillardc6561462009-03-25 10:22:31 +00003775#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003776 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003777#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003778
Owen Taylor3473f882001-02-23 17:55:21 +00003779 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003780 if (ctxt->instate == XML_PARSER_EOF)
3781 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 c = CUR_CHAR(l);
3783
Daniel Veillard34e3f642008-07-29 09:02:27 +00003784 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003785 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003786 count = 0;
3787 GROW;
3788 }
3789 COPY_BUF(l,buf,len,c);
3790 NEXTL(l);
3791 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003792 if (c == 0) {
3793 count = 0;
3794 GROW;
3795 if (ctxt->instate == XML_PARSER_EOF)
3796 return(NULL);
3797 c = CUR_CHAR(l);
3798 }
Owen Taylor3473f882001-02-23 17:55:21 +00003799 if (len >= XML_MAX_NAMELEN) {
3800 /*
3801 * Okay someone managed to make a huge token, so he's ready to pay
3802 * for the processing speed.
3803 */
3804 xmlChar *buffer;
3805 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003806
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003807 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003808 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003810 return(NULL);
3811 }
3812 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003813 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003814 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003815 count = 0;
3816 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003817 if (ctxt->instate == XML_PARSER_EOF) {
3818 xmlFree(buffer);
3819 return(NULL);
3820 }
Owen Taylor3473f882001-02-23 17:55:21 +00003821 }
3822 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003823 xmlChar *tmp;
3824
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003825 if ((max > XML_MAX_NAME_LENGTH) &&
3826 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3827 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3828 xmlFree(buffer);
3829 return(NULL);
3830 }
Owen Taylor3473f882001-02-23 17:55:21 +00003831 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003832 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003833 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003834 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003835 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003836 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003837 return(NULL);
3838 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003839 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003840 }
3841 COPY_BUF(l,buffer,len,c);
3842 NEXTL(l);
3843 c = CUR_CHAR(l);
3844 }
3845 buffer[len] = 0;
3846 return(buffer);
3847 }
3848 }
3849 if (len == 0)
3850 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003851 if ((len > XML_MAX_NAME_LENGTH) &&
3852 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3853 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3854 return(NULL);
3855 }
Owen Taylor3473f882001-02-23 17:55:21 +00003856 return(xmlStrndup(buf, len));
3857}
3858
3859/**
3860 * xmlParseEntityValue:
3861 * @ctxt: an XML parser context
3862 * @orig: if non-NULL store a copy of the original entity value
3863 *
3864 * parse a value for ENTITY declarations
3865 *
3866 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3867 * "'" ([^%&'] | PEReference | Reference)* "'"
3868 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003869 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003870 */
3871
3872xmlChar *
3873xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3874 xmlChar *buf = NULL;
3875 int len = 0;
3876 int size = XML_PARSER_BUFFER_SIZE;
3877 int c, l;
3878 xmlChar stop;
3879 xmlChar *ret = NULL;
3880 const xmlChar *cur = NULL;
3881 xmlParserInputPtr input;
3882
3883 if (RAW == '"') stop = '"';
3884 else if (RAW == '\'') stop = '\'';
3885 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003886 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003887 return(NULL);
3888 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003889 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003890 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003891 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003892 return(NULL);
3893 }
3894
3895 /*
3896 * The content of the entity definition is copied in a buffer.
3897 */
3898
3899 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3900 input = ctxt->input;
3901 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003902 if (ctxt->instate == XML_PARSER_EOF) {
3903 xmlFree(buf);
3904 return(NULL);
3905 }
Owen Taylor3473f882001-02-23 17:55:21 +00003906 NEXT;
3907 c = CUR_CHAR(l);
3908 /*
3909 * NOTE: 4.4.5 Included in Literal
3910 * When a parameter entity reference appears in a literal entity
3911 * value, ... a single or double quote character in the replacement
3912 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003913 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003914 * In practice it means we stop the loop only when back at parsing
3915 * the initial entity and the quote is found
3916 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003917 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3918 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003919 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003920 xmlChar *tmp;
3921
Owen Taylor3473f882001-02-23 17:55:21 +00003922 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003923 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3924 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003925 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003926 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003927 return(NULL);
3928 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003929 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003930 }
3931 COPY_BUF(l,buf,len,c);
3932 NEXTL(l);
3933 /*
3934 * Pop-up of finished entities.
3935 */
3936 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3937 xmlPopInput(ctxt);
3938
3939 GROW;
3940 c = CUR_CHAR(l);
3941 if (c == 0) {
3942 GROW;
3943 c = CUR_CHAR(l);
3944 }
3945 }
3946 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003947 if (ctxt->instate == XML_PARSER_EOF) {
3948 xmlFree(buf);
3949 return(NULL);
3950 }
Owen Taylor3473f882001-02-23 17:55:21 +00003951
3952 /*
3953 * Raise problem w.r.t. '&' and '%' being used in non-entities
3954 * reference constructs. Note Charref will be handled in
3955 * xmlStringDecodeEntities()
3956 */
3957 cur = buf;
3958 while (*cur != 0) { /* non input consuming */
3959 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3960 xmlChar *name;
3961 xmlChar tmp = *cur;
3962
3963 cur++;
3964 name = xmlParseStringName(ctxt, &cur);
3965 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003966 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003967 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003968 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003969 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003970 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3971 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003972 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003973 }
3974 if (name != NULL)
3975 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003976 if (*cur == 0)
3977 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003978 }
3979 cur++;
3980 }
3981
3982 /*
3983 * Then PEReference entities are substituted.
3984 */
3985 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003986 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003987 xmlFree(buf);
3988 } else {
3989 NEXT;
3990 /*
3991 * NOTE: 4.4.7 Bypassed
3992 * When a general entity reference appears in the EntityValue in
3993 * an entity declaration, it is bypassed and left as is.
3994 * so XML_SUBSTITUTE_REF is not set here.
3995 */
Peter Simons8f30bdf2016-04-15 11:56:55 +02003996 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003997 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3998 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003999 --ctxt->depth;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004000 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00004001 *orig = buf;
4002 else
4003 xmlFree(buf);
4004 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004005
Owen Taylor3473f882001-02-23 17:55:21 +00004006 return(ret);
4007}
4008
4009/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00004010 * xmlParseAttValueComplex:
4011 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00004012 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004013 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00004014 *
4015 * parse a value for an attribute, this is the fallback function
4016 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004017 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00004018 *
4019 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4020 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00004021static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004022xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00004023 xmlChar limit = 0;
4024 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004025 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004026 size_t len = 0;
4027 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004028 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004029 xmlChar *current = NULL;
4030 xmlEntityPtr ent;
4031
Owen Taylor3473f882001-02-23 17:55:21 +00004032 if (NXT(0) == '"') {
4033 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4034 limit = '"';
4035 NEXT;
4036 } else if (NXT(0) == '\'') {
4037 limit = '\'';
4038 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4039 NEXT;
4040 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004041 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004042 return(NULL);
4043 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00004044
Owen Taylor3473f882001-02-23 17:55:21 +00004045 /*
4046 * allocate a translation buffer.
4047 */
4048 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004049 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004050 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00004051
4052 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004053 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00004054 */
4055 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004056 while (((NXT(0) != limit) && /* checked */
4057 (IS_CHAR(c)) && (c != '<')) &&
4058 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08004059 /*
4060 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4061 * special option is given
4062 */
4063 if ((len > XML_MAX_TEXT_LENGTH) &&
4064 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4065 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004066 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08004067 goto mem_error;
4068 }
Owen Taylor3473f882001-02-23 17:55:21 +00004069 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00004070 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00004071 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004072 if (NXT(1) == '#') {
4073 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004074
Owen Taylor3473f882001-02-23 17:55:21 +00004075 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00004076 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004077 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004078 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004079 }
4080 buf[len++] = '&';
4081 } else {
4082 /*
4083 * The reparsing will be done in xmlStringGetNodeList()
4084 * called by the attribute() function in SAX.c
4085 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08004086 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004087 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004088 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004089 buf[len++] = '&';
4090 buf[len++] = '#';
4091 buf[len++] = '3';
4092 buf[len++] = '8';
4093 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00004094 }
Daniel Veillarddc171602008-03-26 17:41:38 +00004095 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004096 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004097 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004098 }
Owen Taylor3473f882001-02-23 17:55:21 +00004099 len += xmlCopyChar(0, &buf[len], val);
4100 }
4101 } else {
4102 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004103 ctxt->nbentities++;
4104 if (ent != NULL)
4105 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004106 if ((ent != NULL) &&
4107 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004108 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004109 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004110 }
4111 if ((ctxt->replaceEntities == 0) &&
4112 (ent->content[0] == '&')) {
4113 buf[len++] = '&';
4114 buf[len++] = '#';
4115 buf[len++] = '3';
4116 buf[len++] = '8';
4117 buf[len++] = ';';
4118 } else {
4119 buf[len++] = ent->content[0];
4120 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004121 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004122 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004123 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02004124 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004125 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004126 XML_SUBSTITUTE_REF,
4127 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004128 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004129 if (rep != NULL) {
4130 current = rep;
4131 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004132 if ((*current == 0xD) || (*current == 0xA) ||
4133 (*current == 0x9)) {
4134 buf[len++] = 0x20;
4135 current++;
4136 } else
4137 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004138 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004139 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004140 }
4141 }
4142 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004143 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004144 }
4145 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004146 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004147 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004148 }
Owen Taylor3473f882001-02-23 17:55:21 +00004149 if (ent->content != NULL)
4150 buf[len++] = ent->content[0];
4151 }
4152 } else if (ent != NULL) {
4153 int i = xmlStrlen(ent->name);
4154 const xmlChar *cur = ent->name;
4155
4156 /*
4157 * This may look absurd but is needed to detect
4158 * entities problems
4159 */
4160 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004161 (ent->content != NULL) && (ent->checked == 0)) {
4162 unsigned long oldnbent = ctxt->nbentities;
4163
Peter Simons8f30bdf2016-04-15 11:56:55 +02004164 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004165 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004166 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004167 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004168
Daniel Veillardcff25462013-03-11 15:57:55 +08004169 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004170 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004171 if (xmlStrchr(rep, '<'))
4172 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004173 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004174 rep = NULL;
4175 }
Owen Taylor3473f882001-02-23 17:55:21 +00004176 }
4177
4178 /*
4179 * Just output the reference
4180 */
4181 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004182 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004183 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004184 }
4185 for (;i > 0;i--)
4186 buf[len++] = *cur++;
4187 buf[len++] = ';';
4188 }
4189 }
4190 } else {
4191 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004192 if ((len != 0) || (!normalize)) {
4193 if ((!normalize) || (!in_space)) {
4194 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004195 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004196 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004197 }
4198 }
4199 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004200 }
4201 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004202 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004203 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004204 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004205 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004206 }
4207 }
4208 NEXTL(l);
4209 }
4210 GROW;
4211 c = CUR_CHAR(l);
4212 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004213 if (ctxt->instate == XML_PARSER_EOF)
4214 goto error;
4215
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004216 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004217 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004218 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004219 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004220 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004221 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004222 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004223 if ((c != 0) && (!IS_CHAR(c))) {
4224 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4225 "invalid character in attribute value\n");
4226 } else {
4227 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4228 "AttValue: ' expected\n");
4229 }
Owen Taylor3473f882001-02-23 17:55:21 +00004230 } else
4231 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004232
4233 /*
4234 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004235 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004236 */
4237 if (len >= INT_MAX) {
4238 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004239 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004240 goto mem_error;
4241 }
4242
4243 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004244 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004245
4246mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004247 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004248error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004249 if (buf != NULL)
4250 xmlFree(buf);
4251 if (rep != NULL)
4252 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004253 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004254}
4255
4256/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004257 * xmlParseAttValue:
4258 * @ctxt: an XML parser context
4259 *
4260 * parse a value for an attribute
4261 * Note: the parser won't do substitution of entities here, this
4262 * will be handled later in xmlStringGetNodeList
4263 *
4264 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4265 * "'" ([^<&'] | Reference)* "'"
4266 *
4267 * 3.3.3 Attribute-Value Normalization:
4268 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004269 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004270 * - a character reference is processed by appending the referenced
4271 * character to the attribute value
4272 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004273 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004274 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4275 * appending #x20 to the normalized value, except that only a single
4276 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004277 * parsed entity or the literal entity value of an internal parsed entity
4278 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004279 * If the declared value is not CDATA, then the XML processor must further
4280 * process the normalized attribute value by discarding any leading and
4281 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004282 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004283 * All attributes for which no declaration has been read should be treated
4284 * by a non-validating parser as if declared CDATA.
4285 *
4286 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4287 */
4288
4289
4290xmlChar *
4291xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004292 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004293 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004294}
4295
4296/**
Owen Taylor3473f882001-02-23 17:55:21 +00004297 * xmlParseSystemLiteral:
4298 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004299 *
Owen Taylor3473f882001-02-23 17:55:21 +00004300 * parse an XML Literal
4301 *
4302 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4303 *
4304 * Returns the SystemLiteral parsed or NULL
4305 */
4306
4307xmlChar *
4308xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4309 xmlChar *buf = NULL;
4310 int len = 0;
4311 int size = XML_PARSER_BUFFER_SIZE;
4312 int cur, l;
4313 xmlChar stop;
4314 int state = ctxt->instate;
4315 int count = 0;
4316
4317 SHRINK;
4318 if (RAW == '"') {
4319 NEXT;
4320 stop = '"';
4321 } else if (RAW == '\'') {
4322 NEXT;
4323 stop = '\'';
4324 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004325 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004326 return(NULL);
4327 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004328
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004329 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004330 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004331 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 return(NULL);
4333 }
4334 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4335 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004336 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004337 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004338 xmlChar *tmp;
4339
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004340 if ((size > XML_MAX_NAME_LENGTH) &&
4341 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4342 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4343 xmlFree(buf);
4344 ctxt->instate = (xmlParserInputState) state;
4345 return(NULL);
4346 }
Owen Taylor3473f882001-02-23 17:55:21 +00004347 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004348 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4349 if (tmp == NULL) {
4350 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004351 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004352 ctxt->instate = (xmlParserInputState) state;
4353 return(NULL);
4354 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004355 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004356 }
4357 count++;
4358 if (count > 50) {
4359 GROW;
4360 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004361 if (ctxt->instate == XML_PARSER_EOF) {
4362 xmlFree(buf);
4363 return(NULL);
4364 }
Owen Taylor3473f882001-02-23 17:55:21 +00004365 }
4366 COPY_BUF(l,buf,len,cur);
4367 NEXTL(l);
4368 cur = CUR_CHAR(l);
4369 if (cur == 0) {
4370 GROW;
4371 SHRINK;
4372 cur = CUR_CHAR(l);
4373 }
4374 }
4375 buf[len] = 0;
4376 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004377 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004378 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004379 } else {
4380 NEXT;
4381 }
4382 return(buf);
4383}
4384
4385/**
4386 * xmlParsePubidLiteral:
4387 * @ctxt: an XML parser context
4388 *
4389 * parse an XML public literal
4390 *
4391 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4392 *
4393 * Returns the PubidLiteral parsed or NULL.
4394 */
4395
4396xmlChar *
4397xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4398 xmlChar *buf = NULL;
4399 int len = 0;
4400 int size = XML_PARSER_BUFFER_SIZE;
4401 xmlChar cur;
4402 xmlChar stop;
4403 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004404 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004405
4406 SHRINK;
4407 if (RAW == '"') {
4408 NEXT;
4409 stop = '"';
4410 } else if (RAW == '\'') {
4411 NEXT;
4412 stop = '\'';
4413 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004414 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004415 return(NULL);
4416 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004417 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004418 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004419 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004420 return(NULL);
4421 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004422 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004423 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004424 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004425 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004426 xmlChar *tmp;
4427
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004428 if ((size > XML_MAX_NAME_LENGTH) &&
4429 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4430 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4431 xmlFree(buf);
4432 return(NULL);
4433 }
Owen Taylor3473f882001-02-23 17:55:21 +00004434 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004435 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4436 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004437 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004438 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 return(NULL);
4440 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004441 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004442 }
4443 buf[len++] = cur;
4444 count++;
4445 if (count > 50) {
4446 GROW;
4447 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004448 if (ctxt->instate == XML_PARSER_EOF) {
4449 xmlFree(buf);
4450 return(NULL);
4451 }
Owen Taylor3473f882001-02-23 17:55:21 +00004452 }
4453 NEXT;
4454 cur = CUR;
4455 if (cur == 0) {
4456 GROW;
4457 SHRINK;
4458 cur = CUR;
4459 }
4460 }
4461 buf[len] = 0;
4462 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004463 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004464 } else {
4465 NEXT;
4466 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004467 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004468 return(buf);
4469}
4470
Daniel Veillard8ed10722009-08-20 19:17:36 +02004471static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004472
4473/*
4474 * used for the test in the inner loop of the char data testing
4475 */
4476static const unsigned char test_char_data[256] = {
4477 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4478 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4479 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4480 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4481 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4482 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4483 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4484 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4485 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4486 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4487 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4488 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4489 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4490 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4491 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4492 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4502 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4503 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4504 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4505 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4506 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4507 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4509};
4510
Owen Taylor3473f882001-02-23 17:55:21 +00004511/**
4512 * xmlParseCharData:
4513 * @ctxt: an XML parser context
4514 * @cdata: int indicating whether we are within a CDATA section
4515 *
4516 * parse a CharData section.
4517 * if we are within a CDATA section ']]>' marks an end of section.
4518 *
4519 * The right angle bracket (>) may be represented using the string "&gt;",
4520 * and must, for compatibility, be escaped using "&gt;" or a character
4521 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004522 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004523 *
4524 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4525 */
4526
4527void
4528xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004529 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004530 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004531 int line = ctxt->input->line;
4532 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004533 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004534
4535 SHRINK;
4536 GROW;
4537 /*
4538 * Accelerated common case where input don't need to be
4539 * modified before passing it to the handler.
4540 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004541 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004542 in = ctxt->input->cur;
4543 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004544get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004545 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004546 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004547 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004548 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004549 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004550 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004551 goto get_more_space;
4552 }
4553 if (*in == '<') {
4554 nbchar = in - ctxt->input->cur;
4555 if (nbchar > 0) {
4556 const xmlChar *tmp = ctxt->input->cur;
4557 ctxt->input->cur = in;
4558
Daniel Veillard34099b42004-11-04 17:34:35 +00004559 if ((ctxt->sax != NULL) &&
4560 (ctxt->sax->ignorableWhitespace !=
4561 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004562 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004563 if (ctxt->sax->ignorableWhitespace != NULL)
4564 ctxt->sax->ignorableWhitespace(ctxt->userData,
4565 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004566 } else {
4567 if (ctxt->sax->characters != NULL)
4568 ctxt->sax->characters(ctxt->userData,
4569 tmp, nbchar);
4570 if (*ctxt->space == -1)
4571 *ctxt->space = -2;
4572 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004573 } else if ((ctxt->sax != NULL) &&
4574 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004575 ctxt->sax->characters(ctxt->userData,
4576 tmp, nbchar);
4577 }
4578 }
4579 return;
4580 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004581
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004582get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004583 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004584 while (test_char_data[*in]) {
4585 in++;
4586 ccol++;
4587 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004588 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004589 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004590 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004591 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004592 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004593 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004594 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004595 }
4596 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004597 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004598 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004599 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004600 return;
4601 }
4602 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004603 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004604 goto get_more;
4605 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004606 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004607 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004608 if ((ctxt->sax != NULL) &&
4609 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004610 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004611 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004612 const xmlChar *tmp = ctxt->input->cur;
4613 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004614
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004615 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004616 if (ctxt->sax->ignorableWhitespace != NULL)
4617 ctxt->sax->ignorableWhitespace(ctxt->userData,
4618 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004619 } else {
4620 if (ctxt->sax->characters != NULL)
4621 ctxt->sax->characters(ctxt->userData,
4622 tmp, nbchar);
4623 if (*ctxt->space == -1)
4624 *ctxt->space = -2;
4625 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004626 line = ctxt->input->line;
4627 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004628 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004629 if (ctxt->sax->characters != NULL)
4630 ctxt->sax->characters(ctxt->userData,
4631 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004632 line = ctxt->input->line;
4633 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004634 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004635 /* something really bad happened in the SAX callback */
4636 if (ctxt->instate != XML_PARSER_CONTENT)
4637 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004638 }
4639 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004640 if (*in == 0xD) {
4641 in++;
4642 if (*in == 0xA) {
4643 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004644 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004645 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004646 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004647 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004648 in--;
4649 }
4650 if (*in == '<') {
4651 return;
4652 }
4653 if (*in == '&') {
4654 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004655 }
4656 SHRINK;
4657 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004658 if (ctxt->instate == XML_PARSER_EOF)
4659 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004660 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004661 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004662 nbchar = 0;
4663 }
Daniel Veillard50582112001-03-26 22:52:16 +00004664 ctxt->input->line = line;
4665 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004666 xmlParseCharDataComplex(ctxt, cdata);
4667}
4668
Daniel Veillard01c13b52002-12-10 15:19:08 +00004669/**
4670 * xmlParseCharDataComplex:
4671 * @ctxt: an XML parser context
4672 * @cdata: int indicating whether we are within a CDATA section
4673 *
4674 * parse a CharData section.this is the fallback function
4675 * of xmlParseCharData() when the parsing requires handling
4676 * of non-ASCII characters.
4677 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004678static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004679xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004680 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4681 int nbchar = 0;
4682 int cur, l;
4683 int count = 0;
4684
4685 SHRINK;
4686 GROW;
4687 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004688 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004689 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004690 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004691 if ((cur == ']') && (NXT(1) == ']') &&
4692 (NXT(2) == '>')) {
4693 if (cdata) break;
4694 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004695 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004696 }
4697 }
4698 COPY_BUF(l,buf,nbchar,cur);
4699 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004700 buf[nbchar] = 0;
4701
Owen Taylor3473f882001-02-23 17:55:21 +00004702 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004703 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004704 */
4705 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004706 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004707 if (ctxt->sax->ignorableWhitespace != NULL)
4708 ctxt->sax->ignorableWhitespace(ctxt->userData,
4709 buf, nbchar);
4710 } else {
4711 if (ctxt->sax->characters != NULL)
4712 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004713 if ((ctxt->sax->characters !=
4714 ctxt->sax->ignorableWhitespace) &&
4715 (*ctxt->space == -1))
4716 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004717 }
4718 }
4719 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004720 /* something really bad happened in the SAX callback */
4721 if (ctxt->instate != XML_PARSER_CONTENT)
4722 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
4724 count++;
4725 if (count > 50) {
4726 GROW;
4727 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004728 if (ctxt->instate == XML_PARSER_EOF)
4729 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004730 }
4731 NEXTL(l);
4732 cur = CUR_CHAR(l);
4733 }
4734 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004735 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004736 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004737 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004738 */
4739 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004740 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004741 if (ctxt->sax->ignorableWhitespace != NULL)
4742 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4743 } else {
4744 if (ctxt->sax->characters != NULL)
4745 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004746 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4747 (*ctxt->space == -1))
4748 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004749 }
4750 }
4751 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004752 if ((cur != 0) && (!IS_CHAR(cur))) {
4753 /* Generate the error and skip the offending character */
4754 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4755 "PCDATA invalid Char value %d\n",
4756 cur);
4757 NEXTL(l);
4758 }
Owen Taylor3473f882001-02-23 17:55:21 +00004759}
4760
4761/**
4762 * xmlParseExternalID:
4763 * @ctxt: an XML parser context
4764 * @publicID: a xmlChar** receiving PubidLiteral
4765 * @strict: indicate whether we should restrict parsing to only
4766 * production [75], see NOTE below
4767 *
4768 * Parse an External ID or a Public ID
4769 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004770 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004771 * 'PUBLIC' S PubidLiteral S SystemLiteral
4772 *
4773 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4774 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4775 *
4776 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4777 *
4778 * Returns the function returns SystemLiteral and in the second
4779 * case publicID receives PubidLiteral, is strict is off
4780 * it is possible to return NULL and have publicID set.
4781 */
4782
4783xmlChar *
4784xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4785 xmlChar *URI = NULL;
4786
4787 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004788
4789 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004790 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004791 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004792 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004793 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4794 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004795 }
4796 SKIP_BLANKS;
4797 URI = xmlParseSystemLiteral(ctxt);
4798 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004799 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004800 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004801 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004802 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004803 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004804 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004805 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004806 }
4807 SKIP_BLANKS;
4808 *publicID = xmlParsePubidLiteral(ctxt);
4809 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004810 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004811 }
4812 if (strict) {
4813 /*
4814 * We don't handle [83] so "S SystemLiteral" is required.
4815 */
William M. Brack76e95df2003-10-18 16:20:14 +00004816 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004817 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004818 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004819 }
4820 } else {
4821 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004822 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004823 * "S SystemLiteral" is not detected. From a purely parsing
4824 * point of view that's a nice mess.
4825 */
4826 const xmlChar *ptr;
4827 GROW;
4828
4829 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004830 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004831
William M. Brack76e95df2003-10-18 16:20:14 +00004832 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004833 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4834 }
4835 SKIP_BLANKS;
4836 URI = xmlParseSystemLiteral(ctxt);
4837 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004838 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004839 }
4840 }
4841 return(URI);
4842}
4843
4844/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004845 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004846 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004847 * @buf: the already parsed part of the buffer
4848 * @len: number of bytes filles in the buffer
4849 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004850 *
4851 * Skip an XML (SGML) comment <!-- .... -->
4852 * The spec says that "For compatibility, the string "--" (double-hyphen)
4853 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004854 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004855 *
4856 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4857 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004858static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004859xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4860 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004861 int q, ql;
4862 int r, rl;
4863 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004864 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004865 int inputid;
4866
4867 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004868
Owen Taylor3473f882001-02-23 17:55:21 +00004869 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004870 len = 0;
4871 size = XML_PARSER_BUFFER_SIZE;
4872 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4873 if (buf == NULL) {
4874 xmlErrMemory(ctxt, NULL);
4875 return;
4876 }
Owen Taylor3473f882001-02-23 17:55:21 +00004877 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004878 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004879 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004880 if (q == 0)
4881 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004882 if (!IS_CHAR(q)) {
4883 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4884 "xmlParseComment: invalid xmlChar value %d\n",
4885 q);
4886 xmlFree (buf);
4887 return;
4888 }
Owen Taylor3473f882001-02-23 17:55:21 +00004889 NEXTL(ql);
4890 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004891 if (r == 0)
4892 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004893 if (!IS_CHAR(r)) {
4894 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4895 "xmlParseComment: invalid xmlChar value %d\n",
4896 q);
4897 xmlFree (buf);
4898 return;
4899 }
Owen Taylor3473f882001-02-23 17:55:21 +00004900 NEXTL(rl);
4901 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004902 if (cur == 0)
4903 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004904 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004905 ((cur != '>') ||
4906 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004907 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004908 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004909 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004910 if ((len > XML_MAX_TEXT_LENGTH) &&
4911 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4912 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4913 "Comment too big found", NULL);
4914 xmlFree (buf);
4915 return;
4916 }
Owen Taylor3473f882001-02-23 17:55:21 +00004917 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004918 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004919 size_t new_size;
4920
4921 new_size = size * 2;
4922 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004923 if (new_buf == NULL) {
4924 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004925 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004926 return;
4927 }
William M. Bracka3215c72004-07-31 16:24:01 +00004928 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004929 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004930 }
4931 COPY_BUF(ql,buf,len,q);
4932 q = r;
4933 ql = rl;
4934 r = cur;
4935 rl = l;
4936
4937 count++;
4938 if (count > 50) {
4939 GROW;
4940 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004941 if (ctxt->instate == XML_PARSER_EOF) {
4942 xmlFree(buf);
4943 return;
4944 }
Owen Taylor3473f882001-02-23 17:55:21 +00004945 }
4946 NEXTL(l);
4947 cur = CUR_CHAR(l);
4948 if (cur == 0) {
4949 SHRINK;
4950 GROW;
4951 cur = CUR_CHAR(l);
4952 }
4953 }
4954 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004955 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004956 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004957 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004958 } else if (!IS_CHAR(cur)) {
4959 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4960 "xmlParseComment: invalid xmlChar value %d\n",
4961 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004962 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004963 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004964 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4965 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004966 }
4967 NEXT;
4968 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4969 (!ctxt->disableSAX))
4970 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004971 }
Daniel Veillardda629342007-08-01 07:49:06 +00004972 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004973 return;
4974not_terminated:
4975 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4976 "Comment not terminated\n", NULL);
4977 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004978 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004979}
Daniel Veillardda629342007-08-01 07:49:06 +00004980
Daniel Veillard4c778d82005-01-23 17:37:44 +00004981/**
4982 * xmlParseComment:
4983 * @ctxt: an XML parser context
4984 *
4985 * Skip an XML (SGML) comment <!-- .... -->
4986 * The spec says that "For compatibility, the string "--" (double-hyphen)
4987 * must not occur within comments. "
4988 *
4989 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4990 */
4991void
4992xmlParseComment(xmlParserCtxtPtr ctxt) {
4993 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004994 size_t size = XML_PARSER_BUFFER_SIZE;
4995 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004996 xmlParserInputState state;
4997 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004998 size_t nbchar = 0;
4999 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00005000 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005001
5002 /*
5003 * Check that there is a comment right here.
5004 */
5005 if ((RAW != '<') || (NXT(1) != '!') ||
5006 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005007 state = ctxt->instate;
5008 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00005009 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005010 SKIP(4);
5011 SHRINK;
5012 GROW;
5013
5014 /*
5015 * Accelerated common case where input don't need to be
5016 * modified before passing it to the handler.
5017 */
5018 in = ctxt->input->cur;
5019 do {
5020 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005021 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005022 ctxt->input->line++; ctxt->input->col = 1;
5023 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005024 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005025 }
5026get_more:
5027 ccol = ctxt->input->col;
5028 while (((*in > '-') && (*in <= 0x7F)) ||
5029 ((*in >= 0x20) && (*in < '-')) ||
5030 (*in == 0x09)) {
5031 in++;
5032 ccol++;
5033 }
5034 ctxt->input->col = ccol;
5035 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005036 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005037 ctxt->input->line++; ctxt->input->col = 1;
5038 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005039 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005040 goto get_more;
5041 }
5042 nbchar = in - ctxt->input->cur;
5043 /*
5044 * save current set of data
5045 */
5046 if (nbchar > 0) {
5047 if ((ctxt->sax != NULL) &&
5048 (ctxt->sax->comment != NULL)) {
5049 if (buf == NULL) {
5050 if ((*in == '-') && (in[1] == '-'))
5051 size = nbchar + 1;
5052 else
5053 size = XML_PARSER_BUFFER_SIZE + nbchar;
5054 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5055 if (buf == NULL) {
5056 xmlErrMemory(ctxt, NULL);
5057 ctxt->instate = state;
5058 return;
5059 }
5060 len = 0;
5061 } else if (len + nbchar + 1 >= size) {
5062 xmlChar *new_buf;
5063 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5064 new_buf = (xmlChar *) xmlRealloc(buf,
5065 size * sizeof(xmlChar));
5066 if (new_buf == NULL) {
5067 xmlFree (buf);
5068 xmlErrMemory(ctxt, NULL);
5069 ctxt->instate = state;
5070 return;
5071 }
5072 buf = new_buf;
5073 }
5074 memcpy(&buf[len], ctxt->input->cur, nbchar);
5075 len += nbchar;
5076 buf[len] = 0;
5077 }
5078 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08005079 if ((len > XML_MAX_TEXT_LENGTH) &&
5080 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5081 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5082 "Comment too big found", NULL);
5083 xmlFree (buf);
5084 return;
5085 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005086 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00005087 if (*in == 0xA) {
5088 in++;
5089 ctxt->input->line++; ctxt->input->col = 1;
5090 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005091 if (*in == 0xD) {
5092 in++;
5093 if (*in == 0xA) {
5094 ctxt->input->cur = in;
5095 in++;
5096 ctxt->input->line++; ctxt->input->col = 1;
5097 continue; /* while */
5098 }
5099 in--;
5100 }
5101 SHRINK;
5102 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005103 if (ctxt->instate == XML_PARSER_EOF) {
5104 xmlFree(buf);
5105 return;
5106 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005107 in = ctxt->input->cur;
5108 if (*in == '-') {
5109 if (in[1] == '-') {
5110 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005111 if (ctxt->input->id != inputid) {
5112 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5113 "comment doesn't start and stop in the same entity\n");
5114 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005115 SKIP(3);
5116 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5117 (!ctxt->disableSAX)) {
5118 if (buf != NULL)
5119 ctxt->sax->comment(ctxt->userData, buf);
5120 else
5121 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5122 }
5123 if (buf != NULL)
5124 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005125 if (ctxt->instate != XML_PARSER_EOF)
5126 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005127 return;
5128 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005129 if (buf != NULL) {
5130 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5131 "Double hyphen within comment: "
5132 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005133 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005134 } else
5135 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5136 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005137 in++;
5138 ctxt->input->col++;
5139 }
5140 in++;
5141 ctxt->input->col++;
5142 goto get_more;
5143 }
5144 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5145 xmlParseCommentComplex(ctxt, buf, len, size);
5146 ctxt->instate = state;
5147 return;
5148}
5149
Owen Taylor3473f882001-02-23 17:55:21 +00005150
5151/**
5152 * xmlParsePITarget:
5153 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005154 *
Owen Taylor3473f882001-02-23 17:55:21 +00005155 * parse the name of a PI
5156 *
5157 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5158 *
5159 * Returns the PITarget name or NULL
5160 */
5161
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005162const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005163xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005164 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005165
5166 name = xmlParseName(ctxt);
5167 if ((name != NULL) &&
5168 ((name[0] == 'x') || (name[0] == 'X')) &&
5169 ((name[1] == 'm') || (name[1] == 'M')) &&
5170 ((name[2] == 'l') || (name[2] == 'L'))) {
5171 int i;
5172 if ((name[0] == 'x') && (name[1] == 'm') &&
5173 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005174 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005175 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005176 return(name);
5177 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005178 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005179 return(name);
5180 }
5181 for (i = 0;;i++) {
5182 if (xmlW3CPIs[i] == NULL) break;
5183 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5184 return(name);
5185 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005186 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5187 "xmlParsePITarget: invalid name prefix 'xml'\n",
5188 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005189 }
Daniel Veillard37334572008-07-31 08:20:02 +00005190 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005191 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005192 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005193 }
Owen Taylor3473f882001-02-23 17:55:21 +00005194 return(name);
5195}
5196
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005197#ifdef LIBXML_CATALOG_ENABLED
5198/**
5199 * xmlParseCatalogPI:
5200 * @ctxt: an XML parser context
5201 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005202 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005203 * parse an XML Catalog Processing Instruction.
5204 *
5205 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5206 *
5207 * Occurs only if allowed by the user and if happening in the Misc
5208 * part of the document before any doctype informations
5209 * This will add the given catalog to the parsing context in order
5210 * to be used if there is a resolution need further down in the document
5211 */
5212
5213static void
5214xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5215 xmlChar *URL = NULL;
5216 const xmlChar *tmp, *base;
5217 xmlChar marker;
5218
5219 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005220 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005221 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5222 goto error;
5223 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005224 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005225 if (*tmp != '=') {
5226 return;
5227 }
5228 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005229 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005230 marker = *tmp;
5231 if ((marker != '\'') && (marker != '"'))
5232 goto error;
5233 tmp++;
5234 base = tmp;
5235 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5236 if (*tmp == 0)
5237 goto error;
5238 URL = xmlStrndup(base, tmp - base);
5239 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005240 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005241 if (*tmp != 0)
5242 goto error;
5243
5244 if (URL != NULL) {
5245 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5246 xmlFree(URL);
5247 }
5248 return;
5249
5250error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005251 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5252 "Catalog PI syntax error: %s\n",
5253 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005254 if (URL != NULL)
5255 xmlFree(URL);
5256}
5257#endif
5258
Owen Taylor3473f882001-02-23 17:55:21 +00005259/**
5260 * xmlParsePI:
5261 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005262 *
Owen Taylor3473f882001-02-23 17:55:21 +00005263 * parse an XML Processing Instruction.
5264 *
5265 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5266 *
5267 * The processing is transfered to SAX once parsed.
5268 */
5269
5270void
5271xmlParsePI(xmlParserCtxtPtr ctxt) {
5272 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005273 size_t len = 0;
5274 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005275 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005276 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005277 xmlParserInputState state;
5278 int count = 0;
5279
5280 if ((RAW == '<') && (NXT(1) == '?')) {
5281 xmlParserInputPtr input = ctxt->input;
5282 state = ctxt->instate;
5283 ctxt->instate = XML_PARSER_PI;
5284 /*
5285 * this is a Processing Instruction.
5286 */
5287 SKIP(2);
5288 SHRINK;
5289
5290 /*
5291 * Parse the target name and check for special support like
5292 * namespace.
5293 */
5294 target = xmlParsePITarget(ctxt);
5295 if (target != NULL) {
5296 if ((RAW == '?') && (NXT(1) == '>')) {
5297 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005298 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005300 }
5301 SKIP(2);
5302
5303 /*
5304 * SAX: PI detected.
5305 */
5306 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5307 (ctxt->sax->processingInstruction != NULL))
5308 ctxt->sax->processingInstruction(ctxt->userData,
5309 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005310 if (ctxt->instate != XML_PARSER_EOF)
5311 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005312 return;
5313 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005314 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005315 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005316 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005317 ctxt->instate = state;
5318 return;
5319 }
5320 cur = CUR;
5321 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005322 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5323 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005324 }
5325 SKIP_BLANKS;
5326 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005327 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005328 ((cur != '?') || (NXT(1) != '>'))) {
5329 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005330 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005331 size_t new_size = size * 2;
5332 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005333 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005334 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005335 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005336 ctxt->instate = state;
5337 return;
5338 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005339 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005340 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005341 }
5342 count++;
5343 if (count > 50) {
5344 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005345 if (ctxt->instate == XML_PARSER_EOF) {
5346 xmlFree(buf);
5347 return;
5348 }
Owen Taylor3473f882001-02-23 17:55:21 +00005349 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005350 if ((len > XML_MAX_TEXT_LENGTH) &&
5351 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5352 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5353 "PI %s too big found", target);
5354 xmlFree(buf);
5355 ctxt->instate = state;
5356 return;
5357 }
Owen Taylor3473f882001-02-23 17:55:21 +00005358 }
5359 COPY_BUF(l,buf,len,cur);
5360 NEXTL(l);
5361 cur = CUR_CHAR(l);
5362 if (cur == 0) {
5363 SHRINK;
5364 GROW;
5365 cur = CUR_CHAR(l);
5366 }
5367 }
Daniel Veillard51304812012-07-19 20:34:26 +08005368 if ((len > XML_MAX_TEXT_LENGTH) &&
5369 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5370 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5371 "PI %s too big found", target);
5372 xmlFree(buf);
5373 ctxt->instate = state;
5374 return;
5375 }
Owen Taylor3473f882001-02-23 17:55:21 +00005376 buf[len] = 0;
5377 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005378 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5379 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005380 } else {
5381 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005382 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5383 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005384 }
5385 SKIP(2);
5386
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005387#ifdef LIBXML_CATALOG_ENABLED
5388 if (((state == XML_PARSER_MISC) ||
5389 (state == XML_PARSER_START)) &&
5390 (xmlStrEqual(target, XML_CATALOG_PI))) {
5391 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5392 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5393 (allow == XML_CATA_ALLOW_ALL))
5394 xmlParseCatalogPI(ctxt, buf);
5395 }
5396#endif
5397
5398
Owen Taylor3473f882001-02-23 17:55:21 +00005399 /*
5400 * SAX: PI detected.
5401 */
5402 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5403 (ctxt->sax->processingInstruction != NULL))
5404 ctxt->sax->processingInstruction(ctxt->userData,
5405 target, buf);
5406 }
5407 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005408 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005409 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005410 }
Chris Evans77404b82011-12-14 16:18:25 +08005411 if (ctxt->instate != XML_PARSER_EOF)
5412 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005413 }
5414}
5415
5416/**
5417 * xmlParseNotationDecl:
5418 * @ctxt: an XML parser context
5419 *
5420 * parse a notation declaration
5421 *
5422 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5423 *
5424 * Hence there is actually 3 choices:
5425 * 'PUBLIC' S PubidLiteral
5426 * 'PUBLIC' S PubidLiteral S SystemLiteral
5427 * and 'SYSTEM' S SystemLiteral
5428 *
5429 * See the NOTE on xmlParseExternalID().
5430 */
5431
5432void
5433xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005434 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005435 xmlChar *Pubid;
5436 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005437
Daniel Veillarda07050d2003-10-19 14:46:32 +00005438 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005439 xmlParserInputPtr input = ctxt->input;
5440 SHRINK;
5441 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005442 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005443 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5444 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005445 return;
5446 }
5447 SKIP_BLANKS;
5448
Daniel Veillard76d66f42001-05-16 21:05:17 +00005449 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005450 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005451 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005452 return;
5453 }
William M. Brack76e95df2003-10-18 16:20:14 +00005454 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005455 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005456 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005457 return;
5458 }
Daniel Veillard37334572008-07-31 08:20:02 +00005459 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005460 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005461 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005462 name, NULL, NULL);
5463 }
Owen Taylor3473f882001-02-23 17:55:21 +00005464 SKIP_BLANKS;
5465
5466 /*
5467 * Parse the IDs.
5468 */
5469 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5470 SKIP_BLANKS;
5471
5472 if (RAW == '>') {
5473 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005474 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5475 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005476 }
5477 NEXT;
5478 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5479 (ctxt->sax->notationDecl != NULL))
5480 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5481 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005482 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005483 }
Owen Taylor3473f882001-02-23 17:55:21 +00005484 if (Systemid != NULL) xmlFree(Systemid);
5485 if (Pubid != NULL) xmlFree(Pubid);
5486 }
5487}
5488
5489/**
5490 * xmlParseEntityDecl:
5491 * @ctxt: an XML parser context
5492 *
5493 * parse <!ENTITY declarations
5494 *
5495 * [70] EntityDecl ::= GEDecl | PEDecl
5496 *
5497 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5498 *
5499 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5500 *
5501 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5502 *
5503 * [74] PEDef ::= EntityValue | ExternalID
5504 *
5505 * [76] NDataDecl ::= S 'NDATA' S Name
5506 *
5507 * [ VC: Notation Declared ]
5508 * The Name must match the declared name of a notation.
5509 */
5510
5511void
5512xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005513 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005514 xmlChar *value = NULL;
5515 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005516 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005517 int isParameter = 0;
5518 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005519 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005520
Daniel Veillard4c778d82005-01-23 17:37:44 +00005521 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005522 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005523 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005524 SHRINK;
5525 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005526 skipped = SKIP_BLANKS;
5527 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005528 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5529 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005530 }
Owen Taylor3473f882001-02-23 17:55:21 +00005531
5532 if (RAW == '%') {
5533 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005534 skipped = SKIP_BLANKS;
5535 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005536 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005537 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005538 }
Owen Taylor3473f882001-02-23 17:55:21 +00005539 isParameter = 1;
5540 }
5541
Daniel Veillard76d66f42001-05-16 21:05:17 +00005542 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005543 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005544 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5545 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005546 return;
5547 }
Daniel Veillard37334572008-07-31 08:20:02 +00005548 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005549 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005550 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005551 name, NULL, NULL);
5552 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005553 skipped = SKIP_BLANKS;
5554 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005555 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5556 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005557 }
Owen Taylor3473f882001-02-23 17:55:21 +00005558
Daniel Veillardf5582f12002-06-11 10:08:16 +00005559 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005560 /*
5561 * handle the various case of definitions...
5562 */
5563 if (isParameter) {
5564 if ((RAW == '"') || (RAW == '\'')) {
5565 value = xmlParseEntityValue(ctxt, &orig);
5566 if (value) {
5567 if ((ctxt->sax != NULL) &&
5568 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5569 ctxt->sax->entityDecl(ctxt->userData, name,
5570 XML_INTERNAL_PARAMETER_ENTITY,
5571 NULL, NULL, value);
5572 }
5573 } else {
5574 URI = xmlParseExternalID(ctxt, &literal, 1);
5575 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005576 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005577 }
5578 if (URI) {
5579 xmlURIPtr uri;
5580
5581 uri = xmlParseURI((const char *) URI);
5582 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005583 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5584 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005585 /*
5586 * This really ought to be a well formedness error
5587 * but the XML Core WG decided otherwise c.f. issue
5588 * E26 of the XML erratas.
5589 */
Owen Taylor3473f882001-02-23 17:55:21 +00005590 } else {
5591 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005592 /*
5593 * Okay this is foolish to block those but not
5594 * invalid URIs.
5595 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005596 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005597 } else {
5598 if ((ctxt->sax != NULL) &&
5599 (!ctxt->disableSAX) &&
5600 (ctxt->sax->entityDecl != NULL))
5601 ctxt->sax->entityDecl(ctxt->userData, name,
5602 XML_EXTERNAL_PARAMETER_ENTITY,
5603 literal, URI, NULL);
5604 }
5605 xmlFreeURI(uri);
5606 }
5607 }
5608 }
5609 } else {
5610 if ((RAW == '"') || (RAW == '\'')) {
5611 value = xmlParseEntityValue(ctxt, &orig);
5612 if ((ctxt->sax != NULL) &&
5613 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5614 ctxt->sax->entityDecl(ctxt->userData, name,
5615 XML_INTERNAL_GENERAL_ENTITY,
5616 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005617 /*
5618 * For expat compatibility in SAX mode.
5619 */
5620 if ((ctxt->myDoc == NULL) ||
5621 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5622 if (ctxt->myDoc == NULL) {
5623 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005624 if (ctxt->myDoc == NULL) {
5625 xmlErrMemory(ctxt, "New Doc failed");
5626 return;
5627 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005628 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005629 }
5630 if (ctxt->myDoc->intSubset == NULL)
5631 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5632 BAD_CAST "fake", NULL, NULL);
5633
Daniel Veillard1af9a412003-08-20 22:54:39 +00005634 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5635 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005636 }
Owen Taylor3473f882001-02-23 17:55:21 +00005637 } else {
5638 URI = xmlParseExternalID(ctxt, &literal, 1);
5639 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005640 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005641 }
5642 if (URI) {
5643 xmlURIPtr uri;
5644
5645 uri = xmlParseURI((const char *)URI);
5646 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005647 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5648 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005649 /*
5650 * This really ought to be a well formedness error
5651 * but the XML Core WG decided otherwise c.f. issue
5652 * E26 of the XML erratas.
5653 */
Owen Taylor3473f882001-02-23 17:55:21 +00005654 } else {
5655 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005656 /*
5657 * Okay this is foolish to block those but not
5658 * invalid URIs.
5659 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005660 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005661 }
5662 xmlFreeURI(uri);
5663 }
5664 }
William M. Brack76e95df2003-10-18 16:20:14 +00005665 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005666 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5667 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005668 }
5669 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005670 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005671 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005672 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005673 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5674 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005675 }
5676 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005677 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005678 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5679 (ctxt->sax->unparsedEntityDecl != NULL))
5680 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5681 literal, URI, ndata);
5682 } else {
5683 if ((ctxt->sax != NULL) &&
5684 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5685 ctxt->sax->entityDecl(ctxt->userData, name,
5686 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5687 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005688 /*
5689 * For expat compatibility in SAX mode.
5690 * assuming the entity repalcement was asked for
5691 */
5692 if ((ctxt->replaceEntities != 0) &&
5693 ((ctxt->myDoc == NULL) ||
5694 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5695 if (ctxt->myDoc == NULL) {
5696 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005697 if (ctxt->myDoc == NULL) {
5698 xmlErrMemory(ctxt, "New Doc failed");
5699 return;
5700 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005701 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005702 }
5703
5704 if (ctxt->myDoc->intSubset == NULL)
5705 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5706 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005707 xmlSAX2EntityDecl(ctxt, name,
5708 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5709 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005710 }
Owen Taylor3473f882001-02-23 17:55:21 +00005711 }
5712 }
5713 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005714 if (ctxt->instate == XML_PARSER_EOF)
5715 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005716 SKIP_BLANKS;
5717 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005718 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005719 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005720 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005721 } else {
5722 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005723 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5724 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005725 }
5726 NEXT;
5727 }
5728 if (orig != NULL) {
5729 /*
5730 * Ugly mechanism to save the raw entity value.
5731 */
5732 xmlEntityPtr cur = NULL;
5733
5734 if (isParameter) {
5735 if ((ctxt->sax != NULL) &&
5736 (ctxt->sax->getParameterEntity != NULL))
5737 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5738 } else {
5739 if ((ctxt->sax != NULL) &&
5740 (ctxt->sax->getEntity != NULL))
5741 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005742 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005743 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005744 }
Owen Taylor3473f882001-02-23 17:55:21 +00005745 }
5746 if (cur != NULL) {
5747 if (cur->orig != NULL)
5748 xmlFree(orig);
5749 else
5750 cur->orig = orig;
5751 } else
5752 xmlFree(orig);
5753 }
Owen Taylor3473f882001-02-23 17:55:21 +00005754 if (value != NULL) xmlFree(value);
5755 if (URI != NULL) xmlFree(URI);
5756 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005757 }
5758}
5759
5760/**
5761 * xmlParseDefaultDecl:
5762 * @ctxt: an XML parser context
5763 * @value: Receive a possible fixed default value for the attribute
5764 *
5765 * Parse an attribute default declaration
5766 *
5767 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5768 *
5769 * [ VC: Required Attribute ]
5770 * if the default declaration is the keyword #REQUIRED, then the
5771 * attribute must be specified for all elements of the type in the
5772 * attribute-list declaration.
5773 *
5774 * [ VC: Attribute Default Legal ]
5775 * The declared default value must meet the lexical constraints of
5776 * the declared attribute type c.f. xmlValidateAttributeDecl()
5777 *
5778 * [ VC: Fixed Attribute Default ]
5779 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005780 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005781 *
5782 * [ WFC: No < in Attribute Values ]
5783 * handled in xmlParseAttValue()
5784 *
5785 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005786 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005787 */
5788
5789int
5790xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5791 int val;
5792 xmlChar *ret;
5793
5794 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005795 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005796 SKIP(9);
5797 return(XML_ATTRIBUTE_REQUIRED);
5798 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005799 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005800 SKIP(8);
5801 return(XML_ATTRIBUTE_IMPLIED);
5802 }
5803 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005804 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005805 SKIP(6);
5806 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005807 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005808 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5809 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005810 }
5811 SKIP_BLANKS;
5812 }
5813 ret = xmlParseAttValue(ctxt);
5814 ctxt->instate = XML_PARSER_DTD;
5815 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005816 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005817 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005818 } else
5819 *value = ret;
5820 return(val);
5821}
5822
5823/**
5824 * xmlParseNotationType:
5825 * @ctxt: an XML parser context
5826 *
5827 * parse an Notation attribute type.
5828 *
5829 * Note: the leading 'NOTATION' S part has already being parsed...
5830 *
5831 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5832 *
5833 * [ VC: Notation Attributes ]
5834 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005835 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005836 *
5837 * Returns: the notation attribute tree built while parsing
5838 */
5839
5840xmlEnumerationPtr
5841xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005842 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005843 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005844
5845 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005846 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005847 return(NULL);
5848 }
5849 SHRINK;
5850 do {
5851 NEXT;
5852 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005853 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005854 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005855 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5856 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005857 xmlFreeEnumeration(ret);
5858 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005859 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005860 tmp = ret;
5861 while (tmp != NULL) {
5862 if (xmlStrEqual(name, tmp->name)) {
5863 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5864 "standalone: attribute notation value token %s duplicated\n",
5865 name, NULL);
5866 if (!xmlDictOwns(ctxt->dict, name))
5867 xmlFree((xmlChar *) name);
5868 break;
5869 }
5870 tmp = tmp->next;
5871 }
5872 if (tmp == NULL) {
5873 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005874 if (cur == NULL) {
5875 xmlFreeEnumeration(ret);
5876 return(NULL);
5877 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005878 if (last == NULL) ret = last = cur;
5879 else {
5880 last->next = cur;
5881 last = cur;
5882 }
Owen Taylor3473f882001-02-23 17:55:21 +00005883 }
5884 SKIP_BLANKS;
5885 } while (RAW == '|');
5886 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005887 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005888 xmlFreeEnumeration(ret);
5889 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005890 }
5891 NEXT;
5892 return(ret);
5893}
5894
5895/**
5896 * xmlParseEnumerationType:
5897 * @ctxt: an XML parser context
5898 *
5899 * parse an Enumeration attribute type.
5900 *
5901 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5902 *
5903 * [ VC: Enumeration ]
5904 * Values of this type must match one of the Nmtoken tokens in
5905 * the declaration
5906 *
5907 * Returns: the enumeration attribute tree built while parsing
5908 */
5909
5910xmlEnumerationPtr
5911xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5912 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005913 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005914
5915 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005916 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005917 return(NULL);
5918 }
5919 SHRINK;
5920 do {
5921 NEXT;
5922 SKIP_BLANKS;
5923 name = xmlParseNmtoken(ctxt);
5924 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005925 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005926 return(ret);
5927 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005928 tmp = ret;
5929 while (tmp != NULL) {
5930 if (xmlStrEqual(name, tmp->name)) {
5931 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5932 "standalone: attribute enumeration value token %s duplicated\n",
5933 name, NULL);
5934 if (!xmlDictOwns(ctxt->dict, name))
5935 xmlFree(name);
5936 break;
5937 }
5938 tmp = tmp->next;
5939 }
5940 if (tmp == NULL) {
5941 cur = xmlCreateEnumeration(name);
5942 if (!xmlDictOwns(ctxt->dict, name))
5943 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005944 if (cur == NULL) {
5945 xmlFreeEnumeration(ret);
5946 return(NULL);
5947 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005948 if (last == NULL) ret = last = cur;
5949 else {
5950 last->next = cur;
5951 last = cur;
5952 }
Owen Taylor3473f882001-02-23 17:55:21 +00005953 }
5954 SKIP_BLANKS;
5955 } while (RAW == '|');
5956 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005957 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 return(ret);
5959 }
5960 NEXT;
5961 return(ret);
5962}
5963
5964/**
5965 * xmlParseEnumeratedType:
5966 * @ctxt: an XML parser context
5967 * @tree: the enumeration tree built while parsing
5968 *
5969 * parse an Enumerated attribute type.
5970 *
5971 * [57] EnumeratedType ::= NotationType | Enumeration
5972 *
5973 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5974 *
5975 *
5976 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5977 */
5978
5979int
5980xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005981 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005982 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005983 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005984 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5985 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005986 return(0);
5987 }
5988 SKIP_BLANKS;
5989 *tree = xmlParseNotationType(ctxt);
5990 if (*tree == NULL) return(0);
5991 return(XML_ATTRIBUTE_NOTATION);
5992 }
5993 *tree = xmlParseEnumerationType(ctxt);
5994 if (*tree == NULL) return(0);
5995 return(XML_ATTRIBUTE_ENUMERATION);
5996}
5997
5998/**
5999 * xmlParseAttributeType:
6000 * @ctxt: an XML parser context
6001 * @tree: the enumeration tree built while parsing
6002 *
6003 * parse the Attribute list def for an element
6004 *
6005 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
6006 *
6007 * [55] StringType ::= 'CDATA'
6008 *
6009 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6010 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6011 *
6012 * Validity constraints for attribute values syntax are checked in
6013 * xmlValidateAttributeValue()
6014 *
6015 * [ VC: ID ]
6016 * Values of type ID must match the Name production. A name must not
6017 * appear more than once in an XML document as a value of this type;
6018 * i.e., ID values must uniquely identify the elements which bear them.
6019 *
6020 * [ VC: One ID per Element Type ]
6021 * No element type may have more than one ID attribute specified.
6022 *
6023 * [ VC: ID Attribute Default ]
6024 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6025 *
6026 * [ VC: IDREF ]
6027 * Values of type IDREF must match the Name production, and values
6028 * of type IDREFS must match Names; each IDREF Name must match the value
6029 * of an ID attribute on some element in the XML document; i.e. IDREF
6030 * values must match the value of some ID attribute.
6031 *
6032 * [ VC: Entity Name ]
6033 * Values of type ENTITY must match the Name production, values
6034 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006035 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00006036 *
6037 * [ VC: Name Token ]
6038 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006039 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00006040 *
6041 * Returns the attribute type
6042 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006043int
Owen Taylor3473f882001-02-23 17:55:21 +00006044xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6045 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006046 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006047 SKIP(5);
6048 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006049 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006050 SKIP(6);
6051 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006052 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006053 SKIP(5);
6054 return(XML_ATTRIBUTE_IDREF);
6055 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6056 SKIP(2);
6057 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006058 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006059 SKIP(6);
6060 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006061 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006062 SKIP(8);
6063 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006064 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006065 SKIP(8);
6066 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006067 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006068 SKIP(7);
6069 return(XML_ATTRIBUTE_NMTOKEN);
6070 }
6071 return(xmlParseEnumeratedType(ctxt, tree));
6072}
6073
6074/**
6075 * xmlParseAttributeListDecl:
6076 * @ctxt: an XML parser context
6077 *
6078 * : parse the Attribute list def for an element
6079 *
6080 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6081 *
6082 * [53] AttDef ::= S Name S AttType S DefaultDecl
6083 *
6084 */
6085void
6086xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006087 const xmlChar *elemName;
6088 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00006089 xmlEnumerationPtr tree;
6090
Daniel Veillarda07050d2003-10-19 14:46:32 +00006091 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006092 xmlParserInputPtr input = ctxt->input;
6093
6094 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006095 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006096 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006097 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006098 }
6099 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006100 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006102 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6103 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006104 return;
6105 }
6106 SKIP_BLANKS;
6107 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006108 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006109 const xmlChar *check = CUR_PTR;
6110 int type;
6111 int def;
6112 xmlChar *defaultValue = NULL;
6113
6114 GROW;
6115 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006116 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006117 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006118 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6119 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006120 break;
6121 }
6122 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006123 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006124 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006125 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006126 break;
6127 }
6128 SKIP_BLANKS;
6129
6130 type = xmlParseAttributeType(ctxt, &tree);
6131 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006132 break;
6133 }
6134
6135 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006136 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006137 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6138 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006139 if (tree != NULL)
6140 xmlFreeEnumeration(tree);
6141 break;
6142 }
6143 SKIP_BLANKS;
6144
6145 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6146 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006147 if (defaultValue != NULL)
6148 xmlFree(defaultValue);
6149 if (tree != NULL)
6150 xmlFreeEnumeration(tree);
6151 break;
6152 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006153 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6154 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006155
6156 GROW;
6157 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006158 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006159 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006160 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006161 if (defaultValue != NULL)
6162 xmlFree(defaultValue);
6163 if (tree != NULL)
6164 xmlFreeEnumeration(tree);
6165 break;
6166 }
6167 SKIP_BLANKS;
6168 }
6169 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006170 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6171 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006172 if (defaultValue != NULL)
6173 xmlFree(defaultValue);
6174 if (tree != NULL)
6175 xmlFreeEnumeration(tree);
6176 break;
6177 }
6178 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6179 (ctxt->sax->attributeDecl != NULL))
6180 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6181 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006182 else if (tree != NULL)
6183 xmlFreeEnumeration(tree);
6184
6185 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006186 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006187 (def != XML_ATTRIBUTE_REQUIRED)) {
6188 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6189 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006190 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006191 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6192 }
Owen Taylor3473f882001-02-23 17:55:21 +00006193 if (defaultValue != NULL)
6194 xmlFree(defaultValue);
6195 GROW;
6196 }
6197 if (RAW == '>') {
6198 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006199 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6200 "Attribute list declaration doesn't start and stop in the same entity\n",
6201 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006202 }
6203 NEXT;
6204 }
Owen Taylor3473f882001-02-23 17:55:21 +00006205 }
6206}
6207
6208/**
6209 * xmlParseElementMixedContentDecl:
6210 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006211 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006212 *
6213 * parse the declaration for a Mixed Element content
6214 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006215 *
Owen Taylor3473f882001-02-23 17:55:21 +00006216 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6217 * '(' S? '#PCDATA' S? ')'
6218 *
6219 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6220 *
6221 * [ VC: No Duplicate Types ]
6222 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006223 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006224 *
6225 * returns: the list of the xmlElementContentPtr describing the element choices
6226 */
6227xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006228xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006229 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006230 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006231
6232 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006233 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006234 SKIP(7);
6235 SKIP_BLANKS;
6236 SHRINK;
6237 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006238 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006239 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6240"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006241 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006242 }
Owen Taylor3473f882001-02-23 17:55:21 +00006243 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006244 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006245 if (ret == NULL)
6246 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006247 if (RAW == '*') {
6248 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6249 NEXT;
6250 }
6251 return(ret);
6252 }
6253 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006254 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006255 if (ret == NULL) return(NULL);
6256 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006257 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006258 NEXT;
6259 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006260 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006261 if (ret == NULL) return(NULL);
6262 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006263 if (cur != NULL)
6264 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006265 cur = ret;
6266 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006267 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006268 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006269 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006270 if (n->c1 != NULL)
6271 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006272 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006273 if (n != NULL)
6274 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006275 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006276 }
6277 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006278 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006279 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006280 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006281 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006282 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006283 return(NULL);
6284 }
6285 SKIP_BLANKS;
6286 GROW;
6287 }
6288 if ((RAW == ')') && (NXT(1) == '*')) {
6289 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006290 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006291 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006292 if (cur->c2 != NULL)
6293 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006294 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006295 if (ret != NULL)
6296 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006297 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006298 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6299"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006300 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006301 }
Owen Taylor3473f882001-02-23 17:55:21 +00006302 SKIP(2);
6303 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006304 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006305 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006306 return(NULL);
6307 }
6308
6309 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006310 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006311 }
6312 return(ret);
6313}
6314
6315/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006316 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006317 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006318 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006319 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006320 *
6321 * parse the declaration for a Mixed Element content
6322 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006323 *
Owen Taylor3473f882001-02-23 17:55:21 +00006324 *
6325 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6326 *
6327 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6328 *
6329 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6330 *
6331 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6332 *
6333 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6334 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006335 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006336 * opening or closing parentheses in a choice, seq, or Mixed
6337 * construct is contained in the replacement text for a parameter
6338 * entity, both must be contained in the same replacement text. For
6339 * interoperability, if a parameter-entity reference appears in a
6340 * choice, seq, or Mixed construct, its replacement text should not
6341 * be empty, and neither the first nor last non-blank character of
6342 * the replacement text should be a connector (| or ,).
6343 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006344 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006345 * hierarchy.
6346 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006347static xmlElementContentPtr
6348xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6349 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006350 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006351 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006352 xmlChar type = 0;
6353
Daniel Veillard489f9672009-08-10 16:49:30 +02006354 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6355 (depth > 2048)) {
6356 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6357"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6358 depth);
6359 return(NULL);
6360 }
Owen Taylor3473f882001-02-23 17:55:21 +00006361 SKIP_BLANKS;
6362 GROW;
6363 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006364 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006365
Owen Taylor3473f882001-02-23 17:55:21 +00006366 /* Recurse on first child */
6367 NEXT;
6368 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006369 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6370 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006371 SKIP_BLANKS;
6372 GROW;
6373 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006374 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006375 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006376 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006377 return(NULL);
6378 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006379 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006380 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006381 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006382 return(NULL);
6383 }
Owen Taylor3473f882001-02-23 17:55:21 +00006384 GROW;
6385 if (RAW == '?') {
6386 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6387 NEXT;
6388 } else if (RAW == '*') {
6389 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6390 NEXT;
6391 } else if (RAW == '+') {
6392 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6393 NEXT;
6394 } else {
6395 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6396 }
Owen Taylor3473f882001-02-23 17:55:21 +00006397 GROW;
6398 }
6399 SKIP_BLANKS;
6400 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006401 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006402 /*
6403 * Each loop we parse one separator and one element.
6404 */
6405 if (RAW == ',') {
6406 if (type == 0) type = CUR;
6407
6408 /*
6409 * Detect "Name | Name , Name" error
6410 */
6411 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006412 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006413 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006414 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006415 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006416 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006417 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006418 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006419 return(NULL);
6420 }
6421 NEXT;
6422
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006423 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006424 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006425 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006426 xmlFreeDocElementContent(ctxt->myDoc, last);
6427 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006428 return(NULL);
6429 }
6430 if (last == NULL) {
6431 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006432 if (ret != NULL)
6433 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006434 ret = cur = op;
6435 } else {
6436 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006437 if (op != NULL)
6438 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006439 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006440 if (last != NULL)
6441 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006442 cur =op;
6443 last = NULL;
6444 }
6445 } else if (RAW == '|') {
6446 if (type == 0) type = CUR;
6447
6448 /*
6449 * Detect "Name , Name | Name" error
6450 */
6451 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006452 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006453 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006454 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006455 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006456 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006457 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006458 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006459 return(NULL);
6460 }
6461 NEXT;
6462
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006463 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006464 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006465 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006466 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006467 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006468 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006469 return(NULL);
6470 }
6471 if (last == NULL) {
6472 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006473 if (ret != NULL)
6474 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006475 ret = cur = op;
6476 } else {
6477 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006478 if (op != NULL)
6479 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006480 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006481 if (last != NULL)
6482 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006483 cur =op;
6484 last = NULL;
6485 }
6486 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006487 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006488 if ((last != NULL) && (last != ret))
6489 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006490 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006491 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006492 return(NULL);
6493 }
6494 GROW;
6495 SKIP_BLANKS;
6496 GROW;
6497 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006498 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006499 /* Recurse on second child */
6500 NEXT;
6501 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006502 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6503 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006504 SKIP_BLANKS;
6505 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006506 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006507 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006508 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006509 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006510 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006511 return(NULL);
6512 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006513 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006514 if (last == NULL) {
6515 if (ret != NULL)
6516 xmlFreeDocElementContent(ctxt->myDoc, ret);
6517 return(NULL);
6518 }
Owen Taylor3473f882001-02-23 17:55:21 +00006519 if (RAW == '?') {
6520 last->ocur = XML_ELEMENT_CONTENT_OPT;
6521 NEXT;
6522 } else if (RAW == '*') {
6523 last->ocur = XML_ELEMENT_CONTENT_MULT;
6524 NEXT;
6525 } else if (RAW == '+') {
6526 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6527 NEXT;
6528 } else {
6529 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6530 }
6531 }
6532 SKIP_BLANKS;
6533 GROW;
6534 }
6535 if ((cur != NULL) && (last != NULL)) {
6536 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006537 if (last != NULL)
6538 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006539 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006540 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006541 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6542"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006543 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006544 }
Owen Taylor3473f882001-02-23 17:55:21 +00006545 NEXT;
6546 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006547 if (ret != NULL) {
6548 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6549 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6550 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6551 else
6552 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6553 }
Owen Taylor3473f882001-02-23 17:55:21 +00006554 NEXT;
6555 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006556 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006557 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006558 cur = ret;
6559 /*
6560 * Some normalization:
6561 * (a | b* | c?)* == (a | b | c)*
6562 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006563 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006564 if ((cur->c1 != NULL) &&
6565 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6566 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6567 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6568 if ((cur->c2 != NULL) &&
6569 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6570 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6571 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6572 cur = cur->c2;
6573 }
6574 }
Owen Taylor3473f882001-02-23 17:55:21 +00006575 NEXT;
6576 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006577 if (ret != NULL) {
6578 int found = 0;
6579
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006580 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6581 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6582 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006583 else
6584 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006585 /*
6586 * Some normalization:
6587 * (a | b*)+ == (a | b)*
6588 * (a | b?)+ == (a | b)*
6589 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006590 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006591 if ((cur->c1 != NULL) &&
6592 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6593 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6594 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6595 found = 1;
6596 }
6597 if ((cur->c2 != NULL) &&
6598 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6599 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6600 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6601 found = 1;
6602 }
6603 cur = cur->c2;
6604 }
6605 if (found)
6606 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6607 }
Owen Taylor3473f882001-02-23 17:55:21 +00006608 NEXT;
6609 }
6610 return(ret);
6611}
6612
6613/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006614 * xmlParseElementChildrenContentDecl:
6615 * @ctxt: an XML parser context
6616 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006617 *
6618 * parse the declaration for a Mixed Element content
6619 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6620 *
6621 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6622 *
6623 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6624 *
6625 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6626 *
6627 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6628 *
6629 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6630 * TODO Parameter-entity replacement text must be properly nested
6631 * with parenthesized groups. That is to say, if either of the
6632 * opening or closing parentheses in a choice, seq, or Mixed
6633 * construct is contained in the replacement text for a parameter
6634 * entity, both must be contained in the same replacement text. For
6635 * interoperability, if a parameter-entity reference appears in a
6636 * choice, seq, or Mixed construct, its replacement text should not
6637 * be empty, and neither the first nor last non-blank character of
6638 * the replacement text should be a connector (| or ,).
6639 *
6640 * Returns the tree of xmlElementContentPtr describing the element
6641 * hierarchy.
6642 */
6643xmlElementContentPtr
6644xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6645 /* stub left for API/ABI compat */
6646 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6647}
6648
6649/**
Owen Taylor3473f882001-02-23 17:55:21 +00006650 * xmlParseElementContentDecl:
6651 * @ctxt: an XML parser context
6652 * @name: the name of the element being defined.
6653 * @result: the Element Content pointer will be stored here if any
6654 *
6655 * parse the declaration for an Element content either Mixed or Children,
6656 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006657 *
Owen Taylor3473f882001-02-23 17:55:21 +00006658 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6659 *
6660 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6661 */
6662
6663int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006664xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006665 xmlElementContentPtr *result) {
6666
6667 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006668 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006669 int res;
6670
6671 *result = NULL;
6672
6673 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006674 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006675 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006676 return(-1);
6677 }
6678 NEXT;
6679 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006680 if (ctxt->instate == XML_PARSER_EOF)
6681 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006682 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006683 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006684 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006685 res = XML_ELEMENT_TYPE_MIXED;
6686 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006687 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006688 res = XML_ELEMENT_TYPE_ELEMENT;
6689 }
Owen Taylor3473f882001-02-23 17:55:21 +00006690 SKIP_BLANKS;
6691 *result = tree;
6692 return(res);
6693}
6694
6695/**
6696 * xmlParseElementDecl:
6697 * @ctxt: an XML parser context
6698 *
6699 * parse an Element declaration.
6700 *
6701 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6702 *
6703 * [ VC: Unique Element Type Declaration ]
6704 * No element type may be declared more than once
6705 *
6706 * Returns the type of the element, or -1 in case of error
6707 */
6708int
6709xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006710 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006711 int ret = -1;
6712 xmlElementContentPtr content = NULL;
6713
Daniel Veillard4c778d82005-01-23 17:37:44 +00006714 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006715 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006716 xmlParserInputPtr input = ctxt->input;
6717
6718 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006719 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006720 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6721 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006722 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006723 }
6724 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006725 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006726 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006727 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6728 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006729 return(-1);
6730 }
6731 while ((RAW == 0) && (ctxt->inputNr > 1))
6732 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006733 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006734 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6735 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006736 }
6737 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006738 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006739 SKIP(5);
6740 /*
6741 * Element must always be empty.
6742 */
6743 ret = XML_ELEMENT_TYPE_EMPTY;
6744 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6745 (NXT(2) == 'Y')) {
6746 SKIP(3);
6747 /*
6748 * Element is a generic container.
6749 */
6750 ret = XML_ELEMENT_TYPE_ANY;
6751 } else if (RAW == '(') {
6752 ret = xmlParseElementContentDecl(ctxt, name, &content);
6753 } else {
6754 /*
6755 * [ WFC: PEs in Internal Subset ] error handling.
6756 */
6757 if ((RAW == '%') && (ctxt->external == 0) &&
6758 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006759 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006760 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006761 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006762 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006763 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6764 }
Owen Taylor3473f882001-02-23 17:55:21 +00006765 return(-1);
6766 }
6767
6768 SKIP_BLANKS;
6769 /*
6770 * Pop-up of finished entities.
6771 */
6772 while ((RAW == 0) && (ctxt->inputNr > 1))
6773 xmlPopInput(ctxt);
6774 SKIP_BLANKS;
6775
6776 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006777 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006778 if (content != NULL) {
6779 xmlFreeDocElementContent(ctxt->myDoc, content);
6780 }
Owen Taylor3473f882001-02-23 17:55:21 +00006781 } else {
6782 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006783 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6784 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006785 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006786
Owen Taylor3473f882001-02-23 17:55:21 +00006787 NEXT;
6788 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006789 (ctxt->sax->elementDecl != NULL)) {
6790 if (content != NULL)
6791 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006792 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6793 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006794 if ((content != NULL) && (content->parent == NULL)) {
6795 /*
6796 * this is a trick: if xmlAddElementDecl is called,
6797 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006798 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006799 * interfaces or change the API/ABI
6800 */
6801 xmlFreeDocElementContent(ctxt->myDoc, content);
6802 }
6803 } else if (content != NULL) {
6804 xmlFreeDocElementContent(ctxt->myDoc, content);
6805 }
Owen Taylor3473f882001-02-23 17:55:21 +00006806 }
Owen Taylor3473f882001-02-23 17:55:21 +00006807 }
6808 return(ret);
6809}
6810
6811/**
Owen Taylor3473f882001-02-23 17:55:21 +00006812 * xmlParseConditionalSections
6813 * @ctxt: an XML parser context
6814 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006815 * [61] conditionalSect ::= includeSect | ignoreSect
6816 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006817 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6818 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6819 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6820 */
6821
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006822static void
Owen Taylor3473f882001-02-23 17:55:21 +00006823xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006824 int id = ctxt->input->id;
6825
Owen Taylor3473f882001-02-23 17:55:21 +00006826 SKIP(3);
6827 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006828 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006829 SKIP(7);
6830 SKIP_BLANKS;
6831 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006832 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006833 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006834 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006835 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006836 if (ctxt->input->id != id) {
6837 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6838 "All markup of the conditional section is not in the same entity\n",
6839 NULL, NULL);
6840 }
Owen Taylor3473f882001-02-23 17:55:21 +00006841 NEXT;
6842 }
6843 if (xmlParserDebugEntities) {
6844 if ((ctxt->input != NULL) && (ctxt->input->filename))
6845 xmlGenericError(xmlGenericErrorContext,
6846 "%s(%d): ", ctxt->input->filename,
6847 ctxt->input->line);
6848 xmlGenericError(xmlGenericErrorContext,
6849 "Entering INCLUDE Conditional Section\n");
6850 }
6851
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006852 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6853 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006854 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006855 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006856
6857 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6858 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006859 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006860 NEXT;
6861 } else if (RAW == '%') {
6862 xmlParsePEReference(ctxt);
6863 } else
6864 xmlParseMarkupDecl(ctxt);
6865
6866 /*
6867 * Pop-up of finished entities.
6868 */
6869 while ((RAW == 0) && (ctxt->inputNr > 1))
6870 xmlPopInput(ctxt);
6871
Daniel Veillardfdc91562002-07-01 21:52:03 +00006872 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006873 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006874 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006875 break;
6876 }
6877 }
6878 if (xmlParserDebugEntities) {
6879 if ((ctxt->input != NULL) && (ctxt->input->filename))
6880 xmlGenericError(xmlGenericErrorContext,
6881 "%s(%d): ", ctxt->input->filename,
6882 ctxt->input->line);
6883 xmlGenericError(xmlGenericErrorContext,
6884 "Leaving INCLUDE Conditional Section\n");
6885 }
6886
Daniel Veillarda07050d2003-10-19 14:46:32 +00006887 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006888 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006889 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006890 int depth = 0;
6891
6892 SKIP(6);
6893 SKIP_BLANKS;
6894 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006895 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006896 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006897 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006898 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006899 if (ctxt->input->id != id) {
6900 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6901 "All markup of the conditional section is not in the same entity\n",
6902 NULL, NULL);
6903 }
Owen Taylor3473f882001-02-23 17:55:21 +00006904 NEXT;
6905 }
6906 if (xmlParserDebugEntities) {
6907 if ((ctxt->input != NULL) && (ctxt->input->filename))
6908 xmlGenericError(xmlGenericErrorContext,
6909 "%s(%d): ", ctxt->input->filename,
6910 ctxt->input->line);
6911 xmlGenericError(xmlGenericErrorContext,
6912 "Entering IGNORE Conditional Section\n");
6913 }
6914
6915 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006916 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006917 * But disable SAX event generating DTD building in the meantime
6918 */
6919 state = ctxt->disableSAX;
6920 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006921 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006922 ctxt->instate = XML_PARSER_IGNORE;
6923
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006924 while (((depth >= 0) && (RAW != 0)) &&
6925 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006926 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6927 depth++;
6928 SKIP(3);
6929 continue;
6930 }
6931 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6932 if (--depth >= 0) SKIP(3);
6933 continue;
6934 }
6935 NEXT;
6936 continue;
6937 }
6938
6939 ctxt->disableSAX = state;
6940 ctxt->instate = instate;
6941
6942 if (xmlParserDebugEntities) {
6943 if ((ctxt->input != NULL) && (ctxt->input->filename))
6944 xmlGenericError(xmlGenericErrorContext,
6945 "%s(%d): ", ctxt->input->filename,
6946 ctxt->input->line);
6947 xmlGenericError(xmlGenericErrorContext,
6948 "Leaving IGNORE Conditional Section\n");
6949 }
6950
6951 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006952 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006953 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006954 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006955 }
6956
6957 if (RAW == 0)
6958 SHRINK;
6959
6960 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006961 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006962 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006963 if (ctxt->input->id != id) {
6964 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6965 "All markup of the conditional section is not in the same entity\n",
6966 NULL, NULL);
6967 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006968 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006969 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006970 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006971 }
6972}
6973
6974/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006975 * xmlParseMarkupDecl:
6976 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006977 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006978 * parse Markup declarations
6979 *
6980 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6981 * NotationDecl | PI | Comment
6982 *
6983 * [ VC: Proper Declaration/PE Nesting ]
6984 * Parameter-entity replacement text must be properly nested with
6985 * markup declarations. That is to say, if either the first character
6986 * or the last character of a markup declaration (markupdecl above) is
6987 * contained in the replacement text for a parameter-entity reference,
6988 * both must be contained in the same replacement text.
6989 *
6990 * [ WFC: PEs in Internal Subset ]
6991 * In the internal DTD subset, parameter-entity references can occur
6992 * only where markup declarations can occur, not within markup declarations.
6993 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006994 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006995 */
6996void
6997xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6998 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006999 if (CUR == '<') {
7000 if (NXT(1) == '!') {
7001 switch (NXT(2)) {
7002 case 'E':
7003 if (NXT(3) == 'L')
7004 xmlParseElementDecl(ctxt);
7005 else if (NXT(3) == 'N')
7006 xmlParseEntityDecl(ctxt);
7007 break;
7008 case 'A':
7009 xmlParseAttributeListDecl(ctxt);
7010 break;
7011 case 'N':
7012 xmlParseNotationDecl(ctxt);
7013 break;
7014 case '-':
7015 xmlParseComment(ctxt);
7016 break;
7017 default:
7018 /* there is an error but it will be detected later */
7019 break;
7020 }
7021 } else if (NXT(1) == '?') {
7022 xmlParsePI(ctxt);
7023 }
7024 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08007025
7026 /*
7027 * detect requirement to exit there and act accordingly
7028 * and avoid having instate overriden later on
7029 */
7030 if (ctxt->instate == XML_PARSER_EOF)
7031 return;
7032
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007033 /*
7034 * This is only for internal subset. On external entities,
7035 * the replacement is done before parsing stage
7036 */
7037 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7038 xmlParsePEReference(ctxt);
7039
7040 /*
7041 * Conditional sections are allowed from entities included
7042 * by PE References in the internal subset.
7043 */
7044 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7045 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7046 xmlParseConditionalSections(ctxt);
7047 }
7048 }
7049
7050 ctxt->instate = XML_PARSER_DTD;
7051}
7052
7053/**
7054 * xmlParseTextDecl:
7055 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00007056 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007057 * parse an XML declaration header for external entities
7058 *
7059 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007060 */
7061
7062void
7063xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7064 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007065 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007066
7067 /*
7068 * We know that '<?xml' is here.
7069 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007070 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007071 SKIP(5);
7072 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007073 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007074 return;
7075 }
7076
William M. Brack76e95df2003-10-18 16:20:14 +00007077 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007078 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7079 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007080 }
7081 SKIP_BLANKS;
7082
7083 /*
7084 * We may have the VersionInfo here.
7085 */
7086 version = xmlParseVersionInfo(ctxt);
7087 if (version == NULL)
7088 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00007089 else {
William M. Brack76e95df2003-10-18 16:20:14 +00007090 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7092 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00007093 }
7094 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007095 ctxt->input->version = version;
7096
7097 /*
7098 * We must have the encoding declaration
7099 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007100 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007101 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7102 /*
7103 * The XML REC instructs us to stop parsing right here
7104 */
7105 return;
7106 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007107 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7108 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7109 "Missing encoding in text declaration\n");
7110 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007111
7112 SKIP_BLANKS;
7113 if ((RAW == '?') && (NXT(1) == '>')) {
7114 SKIP(2);
7115 } else if (RAW == '>') {
7116 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007117 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007118 NEXT;
7119 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007120 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007121 MOVETO_ENDTAG(CUR_PTR);
7122 NEXT;
7123 }
7124}
7125
7126/**
Owen Taylor3473f882001-02-23 17:55:21 +00007127 * xmlParseExternalSubset:
7128 * @ctxt: an XML parser context
7129 * @ExternalID: the external identifier
7130 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007131 *
Owen Taylor3473f882001-02-23 17:55:21 +00007132 * parse Markup declarations from an external subset
7133 *
7134 * [30] extSubset ::= textDecl? extSubsetDecl
7135 *
7136 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7137 */
7138void
7139xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7140 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007141 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007142 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007143
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007144 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007145 (ctxt->input->end - ctxt->input->cur >= 4)) {
7146 xmlChar start[4];
7147 xmlCharEncoding enc;
7148
7149 start[0] = RAW;
7150 start[1] = NXT(1);
7151 start[2] = NXT(2);
7152 start[3] = NXT(3);
7153 enc = xmlDetectCharEncoding(start, 4);
7154 if (enc != XML_CHAR_ENCODING_NONE)
7155 xmlSwitchEncoding(ctxt, enc);
7156 }
7157
Daniel Veillarda07050d2003-10-19 14:46:32 +00007158 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007159 xmlParseTextDecl(ctxt);
7160 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7161 /*
7162 * The XML REC instructs us to stop parsing right here
7163 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08007164 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007165 return;
7166 }
7167 }
7168 if (ctxt->myDoc == NULL) {
7169 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007170 if (ctxt->myDoc == NULL) {
7171 xmlErrMemory(ctxt, "New Doc failed");
7172 return;
7173 }
7174 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007175 }
7176 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7177 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7178
7179 ctxt->instate = XML_PARSER_DTD;
7180 ctxt->external = 1;
7181 while (((RAW == '<') && (NXT(1) == '?')) ||
7182 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007183 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007184 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007185 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007186
7187 GROW;
7188 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7189 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007190 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007191 NEXT;
7192 } else if (RAW == '%') {
7193 xmlParsePEReference(ctxt);
7194 } else
7195 xmlParseMarkupDecl(ctxt);
7196
7197 /*
7198 * Pop-up of finished entities.
7199 */
7200 while ((RAW == 0) && (ctxt->inputNr > 1))
7201 xmlPopInput(ctxt);
7202
Daniel Veillardfdc91562002-07-01 21:52:03 +00007203 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007204 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007205 break;
7206 }
7207 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007208
Owen Taylor3473f882001-02-23 17:55:21 +00007209 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007210 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007211 }
7212
7213}
7214
7215/**
7216 * xmlParseReference:
7217 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007218 *
Owen Taylor3473f882001-02-23 17:55:21 +00007219 * parse and handle entity references in content, depending on the SAX
7220 * interface, this may end-up in a call to character() if this is a
7221 * CharRef, a predefined entity, if there is no reference() callback.
7222 * or if the parser was asked to switch to that mode.
7223 *
7224 * [67] Reference ::= EntityRef | CharRef
7225 */
7226void
7227xmlParseReference(xmlParserCtxtPtr ctxt) {
7228 xmlEntityPtr ent;
7229 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007230 int was_checked;
7231 xmlNodePtr list = NULL;
7232 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007233
Daniel Veillard0161e632008-08-28 15:36:32 +00007234
7235 if (RAW != '&')
7236 return;
7237
7238 /*
7239 * Simple case of a CharRef
7240 */
Owen Taylor3473f882001-02-23 17:55:21 +00007241 if (NXT(1) == '#') {
7242 int i = 0;
7243 xmlChar out[10];
7244 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007245 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007246
Daniel Veillarddc171602008-03-26 17:41:38 +00007247 if (value == 0)
7248 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007249 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7250 /*
7251 * So we are using non-UTF-8 buffers
7252 * Check that the char fit on 8bits, if not
7253 * generate a CharRef.
7254 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007255 if (value <= 0xFF) {
7256 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007257 out[1] = 0;
7258 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7259 (!ctxt->disableSAX))
7260 ctxt->sax->characters(ctxt->userData, out, 1);
7261 } else {
7262 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007263 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007264 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007265 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007266 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7267 (!ctxt->disableSAX))
7268 ctxt->sax->reference(ctxt->userData, out);
7269 }
7270 } else {
7271 /*
7272 * Just encode the value in UTF-8
7273 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007274 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007275 out[i] = 0;
7276 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7277 (!ctxt->disableSAX))
7278 ctxt->sax->characters(ctxt->userData, out, i);
7279 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007280 return;
7281 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007282
Daniel Veillard0161e632008-08-28 15:36:32 +00007283 /*
7284 * We are seeing an entity reference
7285 */
7286 ent = xmlParseEntityRef(ctxt);
7287 if (ent == NULL) return;
7288 if (!ctxt->wellFormed)
7289 return;
7290 was_checked = ent->checked;
7291
7292 /* special case of predefined entities */
7293 if ((ent->name == NULL) ||
7294 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7295 val = ent->content;
7296 if (val == NULL) return;
7297 /*
7298 * inline the entity.
7299 */
7300 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7301 (!ctxt->disableSAX))
7302 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7303 return;
7304 }
7305
7306 /*
7307 * The first reference to the entity trigger a parsing phase
7308 * where the ent->children is filled with the result from
7309 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007310 * Note: external parsed entities will not be loaded, it is not
7311 * required for a non-validating parser, unless the parsing option
7312 * of validating, or substituting entities were given. Doing so is
7313 * far more secure as the parser will only process data coming from
7314 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007315 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007316 if (((ent->checked == 0) ||
7317 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007318 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7319 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007320 unsigned long oldnbent = ctxt->nbentities;
7321
7322 /*
7323 * This is a bit hackish but this seems the best
7324 * way to make sure both SAX and DOM entity support
7325 * behaves okay.
7326 */
7327 void *user_data;
7328 if (ctxt->userData == ctxt)
7329 user_data = NULL;
7330 else
7331 user_data = ctxt->userData;
7332
7333 /*
7334 * Check that this entity is well formed
7335 * 4.3.2: An internal general parsed entity is well-formed
7336 * if its replacement text matches the production labeled
7337 * content.
7338 */
7339 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7340 ctxt->depth++;
7341 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7342 user_data, &list);
7343 ctxt->depth--;
7344
7345 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7346 ctxt->depth++;
7347 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7348 user_data, ctxt->depth, ent->URI,
7349 ent->ExternalID, &list);
7350 ctxt->depth--;
7351 } else {
7352 ret = XML_ERR_ENTITY_PE_INTERNAL;
7353 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7354 "invalid entity type found\n", NULL);
7355 }
7356
7357 /*
7358 * Store the number of entities needing parsing for this entity
7359 * content and do checkings
7360 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007361 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7362 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7363 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007364 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007365 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007366 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007367 return;
7368 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007369 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007370 xmlFreeNodeList(list);
7371 return;
7372 }
Owen Taylor3473f882001-02-23 17:55:21 +00007373
Daniel Veillard0161e632008-08-28 15:36:32 +00007374 if ((ret == XML_ERR_OK) && (list != NULL)) {
7375 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7376 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7377 (ent->children == NULL)) {
7378 ent->children = list;
7379 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007380 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007381 * Prune it directly in the generated document
7382 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007383 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007384 if (((list->type == XML_TEXT_NODE) &&
7385 (list->next == NULL)) ||
7386 (ctxt->parseMode == XML_PARSE_READER)) {
7387 list->parent = (xmlNodePtr) ent;
7388 list = NULL;
7389 ent->owner = 1;
7390 } else {
7391 ent->owner = 0;
7392 while (list != NULL) {
7393 list->parent = (xmlNodePtr) ctxt->node;
7394 list->doc = ctxt->myDoc;
7395 if (list->next == NULL)
7396 ent->last = list;
7397 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007398 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007399 list = ent->children;
7400#ifdef LIBXML_LEGACY_ENABLED
7401 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7402 xmlAddEntityReference(ent, list, NULL);
7403#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007404 }
7405 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007406 ent->owner = 1;
7407 while (list != NULL) {
7408 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007409 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007410 if (list->next == NULL)
7411 ent->last = list;
7412 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007413 }
7414 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007415 } else {
7416 xmlFreeNodeList(list);
7417 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007418 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007419 } else if ((ret != XML_ERR_OK) &&
7420 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7421 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7422 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007423 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007424 } else if (list != NULL) {
7425 xmlFreeNodeList(list);
7426 list = NULL;
7427 }
7428 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007429 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007430 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007431 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007432 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007433
Daniel Veillard0161e632008-08-28 15:36:32 +00007434 /*
7435 * Now that the entity content has been gathered
7436 * provide it to the application, this can take different forms based
7437 * on the parsing modes.
7438 */
7439 if (ent->children == NULL) {
7440 /*
7441 * Probably running in SAX mode and the callbacks don't
7442 * build the entity content. So unless we already went
7443 * though parsing for first checking go though the entity
7444 * content to generate callbacks associated to the entity
7445 */
7446 if (was_checked != 0) {
7447 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007448 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007449 * This is a bit hackish but this seems the best
7450 * way to make sure both SAX and DOM entity support
7451 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007452 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007453 if (ctxt->userData == ctxt)
7454 user_data = NULL;
7455 else
7456 user_data = ctxt->userData;
7457
7458 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7459 ctxt->depth++;
7460 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7461 ent->content, user_data, NULL);
7462 ctxt->depth--;
7463 } else if (ent->etype ==
7464 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7465 ctxt->depth++;
7466 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7467 ctxt->sax, user_data, ctxt->depth,
7468 ent->URI, ent->ExternalID, NULL);
7469 ctxt->depth--;
7470 } else {
7471 ret = XML_ERR_ENTITY_PE_INTERNAL;
7472 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7473 "invalid entity type found\n", NULL);
7474 }
7475 if (ret == XML_ERR_ENTITY_LOOP) {
7476 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7477 return;
7478 }
7479 }
7480 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7481 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7482 /*
7483 * Entity reference callback comes second, it's somewhat
7484 * superfluous but a compatibility to historical behaviour
7485 */
7486 ctxt->sax->reference(ctxt->userData, ent->name);
7487 }
7488 return;
7489 }
7490
7491 /*
7492 * If we didn't get any children for the entity being built
7493 */
7494 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7495 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7496 /*
7497 * Create a node.
7498 */
7499 ctxt->sax->reference(ctxt->userData, ent->name);
7500 return;
7501 }
7502
7503 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7504 /*
7505 * There is a problem on the handling of _private for entities
7506 * (bug 155816): Should we copy the content of the field from
7507 * the entity (possibly overwriting some value set by the user
7508 * when a copy is created), should we leave it alone, or should
7509 * we try to take care of different situations? The problem
7510 * is exacerbated by the usage of this field by the xmlReader.
7511 * To fix this bug, we look at _private on the created node
7512 * and, if it's NULL, we copy in whatever was in the entity.
7513 * If it's not NULL we leave it alone. This is somewhat of a
7514 * hack - maybe we should have further tests to determine
7515 * what to do.
7516 */
7517 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7518 /*
7519 * Seems we are generating the DOM content, do
7520 * a simple tree copy for all references except the first
7521 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007522 */
7523 if (((list == NULL) && (ent->owner == 0)) ||
7524 (ctxt->parseMode == XML_PARSE_READER)) {
7525 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7526
7527 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007528 * We are copying here, make sure there is no abuse
7529 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007530 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007531 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7532 return;
7533
7534 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007535 * when operating on a reader, the entities definitions
7536 * are always owning the entities subtree.
7537 if (ctxt->parseMode == XML_PARSE_READER)
7538 ent->owner = 1;
7539 */
7540
7541 cur = ent->children;
7542 while (cur != NULL) {
7543 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7544 if (nw != NULL) {
7545 if (nw->_private == NULL)
7546 nw->_private = cur->_private;
7547 if (firstChild == NULL){
7548 firstChild = nw;
7549 }
7550 nw = xmlAddChild(ctxt->node, nw);
7551 }
7552 if (cur == ent->last) {
7553 /*
7554 * needed to detect some strange empty
7555 * node cases in the reader tests
7556 */
7557 if ((ctxt->parseMode == XML_PARSE_READER) &&
7558 (nw != NULL) &&
7559 (nw->type == XML_ELEMENT_NODE) &&
7560 (nw->children == NULL))
7561 nw->extra = 1;
7562
7563 break;
7564 }
7565 cur = cur->next;
7566 }
7567#ifdef LIBXML_LEGACY_ENABLED
7568 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7569 xmlAddEntityReference(ent, firstChild, nw);
7570#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007571 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007572 xmlNodePtr nw = NULL, cur, next, last,
7573 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007574
7575 /*
7576 * We are copying here, make sure there is no abuse
7577 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007578 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007579 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7580 return;
7581
Daniel Veillard0161e632008-08-28 15:36:32 +00007582 /*
7583 * Copy the entity child list and make it the new
7584 * entity child list. The goal is to make sure any
7585 * ID or REF referenced will be the one from the
7586 * document content and not the entity copy.
7587 */
7588 cur = ent->children;
7589 ent->children = NULL;
7590 last = ent->last;
7591 ent->last = NULL;
7592 while (cur != NULL) {
7593 next = cur->next;
7594 cur->next = NULL;
7595 cur->parent = NULL;
7596 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7597 if (nw != NULL) {
7598 if (nw->_private == NULL)
7599 nw->_private = cur->_private;
7600 if (firstChild == NULL){
7601 firstChild = cur;
7602 }
7603 xmlAddChild((xmlNodePtr) ent, nw);
7604 xmlAddChild(ctxt->node, cur);
7605 }
7606 if (cur == last)
7607 break;
7608 cur = next;
7609 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007610 if (ent->owner == 0)
7611 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007612#ifdef LIBXML_LEGACY_ENABLED
7613 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7614 xmlAddEntityReference(ent, firstChild, nw);
7615#endif /* LIBXML_LEGACY_ENABLED */
7616 } else {
7617 const xmlChar *nbktext;
7618
7619 /*
7620 * the name change is to avoid coalescing of the
7621 * node with a possible previous text one which
7622 * would make ent->children a dangling pointer
7623 */
7624 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7625 -1);
7626 if (ent->children->type == XML_TEXT_NODE)
7627 ent->children->name = nbktext;
7628 if ((ent->last != ent->children) &&
7629 (ent->last->type == XML_TEXT_NODE))
7630 ent->last->name = nbktext;
7631 xmlAddChildList(ctxt->node, ent->children);
7632 }
7633
7634 /*
7635 * This is to avoid a nasty side effect, see
7636 * characters() in SAX.c
7637 */
7638 ctxt->nodemem = 0;
7639 ctxt->nodelen = 0;
7640 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007641 }
7642 }
7643}
7644
7645/**
7646 * xmlParseEntityRef:
7647 * @ctxt: an XML parser context
7648 *
7649 * parse ENTITY references declarations
7650 *
7651 * [68] EntityRef ::= '&' Name ';'
7652 *
7653 * [ WFC: Entity Declared ]
7654 * In a document without any DTD, a document with only an internal DTD
7655 * subset which contains no parameter entity references, or a document
7656 * with "standalone='yes'", the Name given in the entity reference
7657 * must match that in an entity declaration, except that well-formed
7658 * documents need not declare any of the following entities: amp, lt,
7659 * gt, apos, quot. The declaration of a parameter entity must precede
7660 * any reference to it. Similarly, the declaration of a general entity
7661 * must precede any reference to it which appears in a default value in an
7662 * attribute-list declaration. Note that if entities are declared in the
7663 * external subset or in external parameter entities, a non-validating
7664 * processor is not obligated to read and process their declarations;
7665 * for such documents, the rule that an entity must be declared is a
7666 * well-formedness constraint only if standalone='yes'.
7667 *
7668 * [ WFC: Parsed Entity ]
7669 * An entity reference must not contain the name of an unparsed entity
7670 *
7671 * Returns the xmlEntityPtr if found, or NULL otherwise.
7672 */
7673xmlEntityPtr
7674xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007675 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007676 xmlEntityPtr ent = NULL;
7677
7678 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007679 if (ctxt->instate == XML_PARSER_EOF)
7680 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007681
Daniel Veillard0161e632008-08-28 15:36:32 +00007682 if (RAW != '&')
7683 return(NULL);
7684 NEXT;
7685 name = xmlParseName(ctxt);
7686 if (name == NULL) {
7687 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7688 "xmlParseEntityRef: no name\n");
7689 return(NULL);
7690 }
7691 if (RAW != ';') {
7692 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7693 return(NULL);
7694 }
7695 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007696
Daniel Veillard0161e632008-08-28 15:36:32 +00007697 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007698 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007699 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007700 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7701 ent = xmlGetPredefinedEntity(name);
7702 if (ent != NULL)
7703 return(ent);
7704 }
Owen Taylor3473f882001-02-23 17:55:21 +00007705
Daniel Veillard0161e632008-08-28 15:36:32 +00007706 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007707 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007708 */
7709 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007710
Daniel Veillard0161e632008-08-28 15:36:32 +00007711 /*
7712 * Ask first SAX for entity resolution, otherwise try the
7713 * entities which may have stored in the parser context.
7714 */
7715 if (ctxt->sax != NULL) {
7716 if (ctxt->sax->getEntity != NULL)
7717 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007718 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007719 (ctxt->options & XML_PARSE_OLDSAX))
7720 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007721 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7722 (ctxt->userData==ctxt)) {
7723 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007724 }
7725 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007726 if (ctxt->instate == XML_PARSER_EOF)
7727 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007728 /*
7729 * [ WFC: Entity Declared ]
7730 * In a document without any DTD, a document with only an
7731 * internal DTD subset which contains no parameter entity
7732 * references, or a document with "standalone='yes'", the
7733 * Name given in the entity reference must match that in an
7734 * entity declaration, except that well-formed documents
7735 * need not declare any of the following entities: amp, lt,
7736 * gt, apos, quot.
7737 * The declaration of a parameter entity must precede any
7738 * reference to it.
7739 * Similarly, the declaration of a general entity must
7740 * precede any reference to it which appears in a default
7741 * value in an attribute-list declaration. Note that if
7742 * entities are declared in the external subset or in
7743 * external parameter entities, a non-validating processor
7744 * is not obligated to read and process their declarations;
7745 * for such documents, the rule that an entity must be
7746 * declared is a well-formedness constraint only if
7747 * standalone='yes'.
7748 */
7749 if (ent == NULL) {
7750 if ((ctxt->standalone == 1) ||
7751 ((ctxt->hasExternalSubset == 0) &&
7752 (ctxt->hasPErefs == 0))) {
7753 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7754 "Entity '%s' not defined\n", name);
7755 } else {
7756 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7757 "Entity '%s' not defined\n", name);
7758 if ((ctxt->inSubset == 0) &&
7759 (ctxt->sax != NULL) &&
7760 (ctxt->sax->reference != NULL)) {
7761 ctxt->sax->reference(ctxt->userData, name);
7762 }
7763 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007764 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007765 ctxt->valid = 0;
7766 }
7767
7768 /*
7769 * [ WFC: Parsed Entity ]
7770 * An entity reference must not contain the name of an
7771 * unparsed entity
7772 */
7773 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7774 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7775 "Entity reference to unparsed entity %s\n", name);
7776 }
7777
7778 /*
7779 * [ WFC: No External Entity References ]
7780 * Attribute values cannot contain direct or indirect
7781 * entity references to external entities.
7782 */
7783 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7784 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7785 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7786 "Attribute references external entity '%s'\n", name);
7787 }
7788 /*
7789 * [ WFC: No < in Attribute Values ]
7790 * The replacement text of any entity referred to directly or
7791 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007792 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007793 */
7794 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007795 (ent != NULL) &&
7796 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007797 if (((ent->checked & 1) || (ent->checked == 0)) &&
7798 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007799 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7800 "'<' in entity '%s' is not allowed in attributes values\n", name);
7801 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007802 }
7803
7804 /*
7805 * Internal check, no parameter entities here ...
7806 */
7807 else {
7808 switch (ent->etype) {
7809 case XML_INTERNAL_PARAMETER_ENTITY:
7810 case XML_EXTERNAL_PARAMETER_ENTITY:
7811 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7812 "Attempt to reference the parameter entity '%s'\n",
7813 name);
7814 break;
7815 default:
7816 break;
7817 }
7818 }
7819
7820 /*
7821 * [ WFC: No Recursion ]
7822 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007823 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007824 * Done somewhere else
7825 */
Owen Taylor3473f882001-02-23 17:55:21 +00007826 return(ent);
7827}
7828
7829/**
7830 * xmlParseStringEntityRef:
7831 * @ctxt: an XML parser context
7832 * @str: a pointer to an index in the string
7833 *
7834 * parse ENTITY references declarations, but this version parses it from
7835 * a string value.
7836 *
7837 * [68] EntityRef ::= '&' Name ';'
7838 *
7839 * [ WFC: Entity Declared ]
7840 * In a document without any DTD, a document with only an internal DTD
7841 * subset which contains no parameter entity references, or a document
7842 * with "standalone='yes'", the Name given in the entity reference
7843 * must match that in an entity declaration, except that well-formed
7844 * documents need not declare any of the following entities: amp, lt,
7845 * gt, apos, quot. The declaration of a parameter entity must precede
7846 * any reference to it. Similarly, the declaration of a general entity
7847 * must precede any reference to it which appears in a default value in an
7848 * attribute-list declaration. Note that if entities are declared in the
7849 * external subset or in external parameter entities, a non-validating
7850 * processor is not obligated to read and process their declarations;
7851 * for such documents, the rule that an entity must be declared is a
7852 * well-formedness constraint only if standalone='yes'.
7853 *
7854 * [ WFC: Parsed Entity ]
7855 * An entity reference must not contain the name of an unparsed entity
7856 *
7857 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7858 * is updated to the current location in the string.
7859 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007860static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007861xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7862 xmlChar *name;
7863 const xmlChar *ptr;
7864 xmlChar cur;
7865 xmlEntityPtr ent = NULL;
7866
7867 if ((str == NULL) || (*str == NULL))
7868 return(NULL);
7869 ptr = *str;
7870 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007871 if (cur != '&')
7872 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007873
Daniel Veillard0161e632008-08-28 15:36:32 +00007874 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007875 name = xmlParseStringName(ctxt, &ptr);
7876 if (name == NULL) {
7877 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7878 "xmlParseStringEntityRef: no name\n");
7879 *str = ptr;
7880 return(NULL);
7881 }
7882 if (*ptr != ';') {
7883 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007884 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007885 *str = ptr;
7886 return(NULL);
7887 }
7888 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007889
Owen Taylor3473f882001-02-23 17:55:21 +00007890
Daniel Veillard0161e632008-08-28 15:36:32 +00007891 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007892 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007893 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007894 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7895 ent = xmlGetPredefinedEntity(name);
7896 if (ent != NULL) {
7897 xmlFree(name);
7898 *str = ptr;
7899 return(ent);
7900 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007901 }
Owen Taylor3473f882001-02-23 17:55:21 +00007902
Daniel Veillard0161e632008-08-28 15:36:32 +00007903 /*
7904 * Increate the number of entity references parsed
7905 */
7906 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007907
Daniel Veillard0161e632008-08-28 15:36:32 +00007908 /*
7909 * Ask first SAX for entity resolution, otherwise try the
7910 * entities which may have stored in the parser context.
7911 */
7912 if (ctxt->sax != NULL) {
7913 if (ctxt->sax->getEntity != NULL)
7914 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007915 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7916 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007917 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7918 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007919 }
7920 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007921 if (ctxt->instate == XML_PARSER_EOF) {
7922 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007923 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007924 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007925
7926 /*
7927 * [ WFC: Entity Declared ]
7928 * In a document without any DTD, a document with only an
7929 * internal DTD subset which contains no parameter entity
7930 * references, or a document with "standalone='yes'", the
7931 * Name given in the entity reference must match that in an
7932 * entity declaration, except that well-formed documents
7933 * need not declare any of the following entities: amp, lt,
7934 * gt, apos, quot.
7935 * The declaration of a parameter entity must precede any
7936 * reference to it.
7937 * Similarly, the declaration of a general entity must
7938 * precede any reference to it which appears in a default
7939 * value in an attribute-list declaration. Note that if
7940 * entities are declared in the external subset or in
7941 * external parameter entities, a non-validating processor
7942 * is not obligated to read and process their declarations;
7943 * for such documents, the rule that an entity must be
7944 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007945 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007946 */
7947 if (ent == NULL) {
7948 if ((ctxt->standalone == 1) ||
7949 ((ctxt->hasExternalSubset == 0) &&
7950 (ctxt->hasPErefs == 0))) {
7951 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7952 "Entity '%s' not defined\n", name);
7953 } else {
7954 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7955 "Entity '%s' not defined\n",
7956 name);
7957 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007958 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007959 /* TODO ? check regressions ctxt->valid = 0; */
7960 }
7961
7962 /*
7963 * [ WFC: Parsed Entity ]
7964 * An entity reference must not contain the name of an
7965 * unparsed entity
7966 */
7967 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7968 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7969 "Entity reference to unparsed entity %s\n", name);
7970 }
7971
7972 /*
7973 * [ WFC: No External Entity References ]
7974 * Attribute values cannot contain direct or indirect
7975 * entity references to external entities.
7976 */
7977 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7978 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7979 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7980 "Attribute references external entity '%s'\n", name);
7981 }
7982 /*
7983 * [ WFC: No < in Attribute Values ]
7984 * The replacement text of any entity referred to directly or
7985 * indirectly in an attribute value (other than "&lt;") must
7986 * not contain a <.
7987 */
7988 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7989 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007990 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007991 (xmlStrchr(ent->content, '<'))) {
7992 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7993 "'<' in entity '%s' is not allowed in attributes values\n",
7994 name);
7995 }
7996
7997 /*
7998 * Internal check, no parameter entities here ...
7999 */
8000 else {
8001 switch (ent->etype) {
8002 case XML_INTERNAL_PARAMETER_ENTITY:
8003 case XML_EXTERNAL_PARAMETER_ENTITY:
8004 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
8005 "Attempt to reference the parameter entity '%s'\n",
8006 name);
8007 break;
8008 default:
8009 break;
8010 }
8011 }
8012
8013 /*
8014 * [ WFC: No Recursion ]
8015 * A parsed entity must not contain a recursive reference
8016 * to itself, either directly or indirectly.
8017 * Done somewhere else
8018 */
8019
8020 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008021 *str = ptr;
8022 return(ent);
8023}
8024
8025/**
8026 * xmlParsePEReference:
8027 * @ctxt: an XML parser context
8028 *
8029 * parse PEReference declarations
8030 * The entity content is handled directly by pushing it's content as
8031 * a new input stream.
8032 *
8033 * [69] PEReference ::= '%' Name ';'
8034 *
8035 * [ WFC: No Recursion ]
8036 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008037 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008038 *
8039 * [ WFC: Entity Declared ]
8040 * In a document without any DTD, a document with only an internal DTD
8041 * subset which contains no parameter entity references, or a document
8042 * with "standalone='yes'", ... ... The declaration of a parameter
8043 * entity must precede any reference to it...
8044 *
8045 * [ VC: Entity Declared ]
8046 * In a document with an external subset or external parameter entities
8047 * with "standalone='no'", ... ... The declaration of a parameter entity
8048 * must precede any reference to it...
8049 *
8050 * [ WFC: In DTD ]
8051 * Parameter-entity references may only appear in the DTD.
8052 * NOTE: misleading but this is handled.
8053 */
8054void
Daniel Veillard8f597c32003-10-06 08:19:27 +00008055xmlParsePEReference(xmlParserCtxtPtr ctxt)
8056{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008057 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008058 xmlEntityPtr entity = NULL;
8059 xmlParserInputPtr input;
8060
Daniel Veillard0161e632008-08-28 15:36:32 +00008061 if (RAW != '%')
8062 return;
8063 NEXT;
8064 name = xmlParseName(ctxt);
8065 if (name == NULL) {
8066 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8067 "xmlParsePEReference: no name\n");
8068 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008069 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008070 if (RAW != ';') {
8071 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8072 return;
8073 }
8074
8075 NEXT;
8076
8077 /*
8078 * Increate the number of entity references parsed
8079 */
8080 ctxt->nbentities++;
8081
8082 /*
8083 * Request the entity from SAX
8084 */
8085 if ((ctxt->sax != NULL) &&
8086 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008087 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8088 if (ctxt->instate == XML_PARSER_EOF)
8089 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00008090 if (entity == NULL) {
8091 /*
8092 * [ WFC: Entity Declared ]
8093 * In a document without any DTD, a document with only an
8094 * internal DTD subset which contains no parameter entity
8095 * references, or a document with "standalone='yes'", ...
8096 * ... The declaration of a parameter entity must precede
8097 * any reference to it...
8098 */
8099 if ((ctxt->standalone == 1) ||
8100 ((ctxt->hasExternalSubset == 0) &&
8101 (ctxt->hasPErefs == 0))) {
8102 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8103 "PEReference: %%%s; not found\n",
8104 name);
8105 } else {
8106 /*
8107 * [ VC: Entity Declared ]
8108 * In a document with an external subset or external
8109 * parameter entities with "standalone='no'", ...
8110 * ... The declaration of a parameter entity must
8111 * precede any reference to it...
8112 */
8113 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8114 "PEReference: %%%s; not found\n",
8115 name, NULL);
8116 ctxt->valid = 0;
8117 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008118 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008119 } else {
8120 /*
8121 * Internal checking in case the entity quest barfed
8122 */
8123 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8124 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8125 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8126 "Internal: %%%s; is not a parameter entity\n",
8127 name, NULL);
8128 } else if (ctxt->input->free != deallocblankswrapper) {
8129 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8130 if (xmlPushInput(ctxt, input) < 0)
8131 return;
8132 } else {
Brian C. Young4e160612017-04-05 09:47:34 -07008133 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8134 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8135 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8136 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8137 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8138 (ctxt->replaceEntities == 0) &&
8139 (ctxt->validate == 0))
8140 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00008141 /*
8142 * TODO !!!
8143 * handle the extra spaces added before and after
8144 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8145 */
8146 input = xmlNewEntityInputStream(ctxt, entity);
8147 if (xmlPushInput(ctxt, input) < 0)
8148 return;
8149 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8150 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8151 (IS_BLANK_CH(NXT(5)))) {
8152 xmlParseTextDecl(ctxt);
8153 if (ctxt->errNo ==
8154 XML_ERR_UNSUPPORTED_ENCODING) {
8155 /*
8156 * The XML REC instructs us to stop parsing
8157 * right here
8158 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08008159 xmlHaltParser(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00008160 return;
8161 }
8162 }
8163 }
8164 }
8165 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008166}
8167
8168/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008169 * xmlLoadEntityContent:
8170 * @ctxt: an XML parser context
8171 * @entity: an unloaded system entity
8172 *
8173 * Load the original content of the given system entity from the
8174 * ExternalID/SystemID given. This is to be used for Included in Literal
8175 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8176 *
8177 * Returns 0 in case of success and -1 in case of failure
8178 */
8179static int
8180xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8181 xmlParserInputPtr input;
8182 xmlBufferPtr buf;
8183 int l, c;
8184 int count = 0;
8185
8186 if ((ctxt == NULL) || (entity == NULL) ||
8187 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8188 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8189 (entity->content != NULL)) {
8190 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8191 "xmlLoadEntityContent parameter error");
8192 return(-1);
8193 }
8194
8195 if (xmlParserDebugEntities)
8196 xmlGenericError(xmlGenericErrorContext,
8197 "Reading %s entity content input\n", entity->name);
8198
8199 buf = xmlBufferCreate();
8200 if (buf == NULL) {
8201 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8202 "xmlLoadEntityContent parameter error");
8203 return(-1);
8204 }
8205
8206 input = xmlNewEntityInputStream(ctxt, entity);
8207 if (input == NULL) {
8208 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8209 "xmlLoadEntityContent input error");
8210 xmlBufferFree(buf);
8211 return(-1);
8212 }
8213
8214 /*
8215 * Push the entity as the current input, read char by char
8216 * saving to the buffer until the end of the entity or an error
8217 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008218 if (xmlPushInput(ctxt, input) < 0) {
8219 xmlBufferFree(buf);
8220 return(-1);
8221 }
8222
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008223 GROW;
8224 c = CUR_CHAR(l);
8225 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8226 (IS_CHAR(c))) {
8227 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008228 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008229 count = 0;
8230 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008231 if (ctxt->instate == XML_PARSER_EOF) {
8232 xmlBufferFree(buf);
8233 return(-1);
8234 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008235 }
8236 NEXTL(l);
8237 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008238 if (c == 0) {
8239 count = 0;
8240 GROW;
8241 if (ctxt->instate == XML_PARSER_EOF) {
8242 xmlBufferFree(buf);
8243 return(-1);
8244 }
8245 c = CUR_CHAR(l);
8246 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008247 }
8248
8249 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8250 xmlPopInput(ctxt);
8251 } else if (!IS_CHAR(c)) {
8252 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8253 "xmlLoadEntityContent: invalid char value %d\n",
8254 c);
8255 xmlBufferFree(buf);
8256 return(-1);
8257 }
8258 entity->content = buf->content;
8259 buf->content = NULL;
8260 xmlBufferFree(buf);
8261
8262 return(0);
8263}
8264
8265/**
Owen Taylor3473f882001-02-23 17:55:21 +00008266 * xmlParseStringPEReference:
8267 * @ctxt: an XML parser context
8268 * @str: a pointer to an index in the string
8269 *
8270 * parse PEReference declarations
8271 *
8272 * [69] PEReference ::= '%' Name ';'
8273 *
8274 * [ WFC: No Recursion ]
8275 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008276 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008277 *
8278 * [ WFC: Entity Declared ]
8279 * In a document without any DTD, a document with only an internal DTD
8280 * subset which contains no parameter entity references, or a document
8281 * with "standalone='yes'", ... ... The declaration of a parameter
8282 * entity must precede any reference to it...
8283 *
8284 * [ VC: Entity Declared ]
8285 * In a document with an external subset or external parameter entities
8286 * with "standalone='no'", ... ... The declaration of a parameter entity
8287 * must precede any reference to it...
8288 *
8289 * [ WFC: In DTD ]
8290 * Parameter-entity references may only appear in the DTD.
8291 * NOTE: misleading but this is handled.
8292 *
8293 * Returns the string of the entity content.
8294 * str is updated to the current value of the index
8295 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008296static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008297xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8298 const xmlChar *ptr;
8299 xmlChar cur;
8300 xmlChar *name;
8301 xmlEntityPtr entity = NULL;
8302
8303 if ((str == NULL) || (*str == NULL)) return(NULL);
8304 ptr = *str;
8305 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008306 if (cur != '%')
8307 return(NULL);
8308 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008309 name = xmlParseStringName(ctxt, &ptr);
8310 if (name == NULL) {
8311 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8312 "xmlParseStringPEReference: no name\n");
8313 *str = ptr;
8314 return(NULL);
8315 }
8316 cur = *ptr;
8317 if (cur != ';') {
8318 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8319 xmlFree(name);
8320 *str = ptr;
8321 return(NULL);
8322 }
8323 ptr++;
8324
8325 /*
8326 * Increate the number of entity references parsed
8327 */
8328 ctxt->nbentities++;
8329
8330 /*
8331 * Request the entity from SAX
8332 */
8333 if ((ctxt->sax != NULL) &&
8334 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008335 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8336 if (ctxt->instate == XML_PARSER_EOF) {
8337 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008338 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008339 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008340 if (entity == NULL) {
8341 /*
8342 * [ WFC: Entity Declared ]
8343 * In a document without any DTD, a document with only an
8344 * internal DTD subset which contains no parameter entity
8345 * references, or a document with "standalone='yes'", ...
8346 * ... The declaration of a parameter entity must precede
8347 * any reference to it...
8348 */
8349 if ((ctxt->standalone == 1) ||
8350 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8351 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8352 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008353 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008354 /*
8355 * [ VC: Entity Declared ]
8356 * In a document with an external subset or external
8357 * parameter entities with "standalone='no'", ...
8358 * ... The declaration of a parameter entity must
8359 * precede any reference to it...
8360 */
8361 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8362 "PEReference: %%%s; not found\n",
8363 name, NULL);
8364 ctxt->valid = 0;
8365 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008366 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008367 } else {
8368 /*
8369 * Internal checking in case the entity quest barfed
8370 */
8371 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8372 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8373 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8374 "%%%s; is not a parameter entity\n",
8375 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008376 }
8377 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008378 ctxt->hasPErefs = 1;
8379 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008380 *str = ptr;
8381 return(entity);
8382}
8383
8384/**
8385 * xmlParseDocTypeDecl:
8386 * @ctxt: an XML parser context
8387 *
8388 * parse a DOCTYPE declaration
8389 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008390 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008391 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8392 *
8393 * [ VC: Root Element Type ]
8394 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008395 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008396 */
8397
8398void
8399xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008400 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008401 xmlChar *ExternalID = NULL;
8402 xmlChar *URI = NULL;
8403
8404 /*
8405 * We know that '<!DOCTYPE' has been detected.
8406 */
8407 SKIP(9);
8408
8409 SKIP_BLANKS;
8410
8411 /*
8412 * Parse the DOCTYPE name.
8413 */
8414 name = xmlParseName(ctxt);
8415 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008416 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8417 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008418 }
8419 ctxt->intSubName = name;
8420
8421 SKIP_BLANKS;
8422
8423 /*
8424 * Check for SystemID and ExternalID
8425 */
8426 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8427
8428 if ((URI != NULL) || (ExternalID != NULL)) {
8429 ctxt->hasExternalSubset = 1;
8430 }
8431 ctxt->extSubURI = URI;
8432 ctxt->extSubSystem = ExternalID;
8433
8434 SKIP_BLANKS;
8435
8436 /*
8437 * Create and update the internal subset.
8438 */
8439 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8440 (!ctxt->disableSAX))
8441 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008442 if (ctxt->instate == XML_PARSER_EOF)
8443 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008444
8445 /*
8446 * Is there any internal subset declarations ?
8447 * they are handled separately in xmlParseInternalSubset()
8448 */
8449 if (RAW == '[')
8450 return;
8451
8452 /*
8453 * We should be at the end of the DOCTYPE declaration.
8454 */
8455 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008456 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008457 }
8458 NEXT;
8459}
8460
8461/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008462 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008463 * @ctxt: an XML parser context
8464 *
8465 * parse the internal subset declaration
8466 *
8467 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8468 */
8469
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008470static void
Owen Taylor3473f882001-02-23 17:55:21 +00008471xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8472 /*
8473 * Is there any DTD definition ?
8474 */
8475 if (RAW == '[') {
8476 ctxt->instate = XML_PARSER_DTD;
8477 NEXT;
8478 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008479 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008480 * PEReferences.
8481 * Subsequence (markupdecl | PEReference | S)*
8482 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008483 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008484 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008485 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008486
8487 SKIP_BLANKS;
8488 xmlParseMarkupDecl(ctxt);
8489 xmlParsePEReference(ctxt);
8490
8491 /*
8492 * Pop-up of finished entities.
8493 */
8494 while ((RAW == 0) && (ctxt->inputNr > 1))
8495 xmlPopInput(ctxt);
8496
8497 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008498 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008499 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008500 break;
8501 }
8502 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008503 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008504 NEXT;
8505 SKIP_BLANKS;
8506 }
8507 }
8508
8509 /*
8510 * We should be at the end of the DOCTYPE declaration.
8511 */
8512 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008513 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008514 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008515 }
8516 NEXT;
8517}
8518
Daniel Veillard81273902003-09-30 00:43:48 +00008519#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008520/**
8521 * xmlParseAttribute:
8522 * @ctxt: an XML parser context
8523 * @value: a xmlChar ** used to store the value of the attribute
8524 *
8525 * parse an attribute
8526 *
8527 * [41] Attribute ::= Name Eq AttValue
8528 *
8529 * [ WFC: No External Entity References ]
8530 * Attribute values cannot contain direct or indirect entity references
8531 * to external entities.
8532 *
8533 * [ WFC: No < in Attribute Values ]
8534 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008535 * an attribute value (other than "&lt;") must not contain a <.
8536 *
Owen Taylor3473f882001-02-23 17:55:21 +00008537 * [ VC: Attribute Value Type ]
8538 * The attribute must have been declared; the value must be of the type
8539 * declared for it.
8540 *
8541 * [25] Eq ::= S? '=' S?
8542 *
8543 * With namespace:
8544 *
8545 * [NS 11] Attribute ::= QName Eq AttValue
8546 *
8547 * Also the case QName == xmlns:??? is handled independently as a namespace
8548 * definition.
8549 *
8550 * Returns the attribute name, and the value in *value.
8551 */
8552
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008553const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008554xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008555 const xmlChar *name;
8556 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008557
8558 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008559 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008560 name = xmlParseName(ctxt);
8561 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008562 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008563 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008564 return(NULL);
8565 }
8566
8567 /*
8568 * read the value
8569 */
8570 SKIP_BLANKS;
8571 if (RAW == '=') {
8572 NEXT;
8573 SKIP_BLANKS;
8574 val = xmlParseAttValue(ctxt);
8575 ctxt->instate = XML_PARSER_CONTENT;
8576 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008577 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008578 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008579 return(NULL);
8580 }
8581
8582 /*
8583 * Check that xml:lang conforms to the specification
8584 * No more registered as an error, just generate a warning now
8585 * since this was deprecated in XML second edition
8586 */
8587 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8588 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008589 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8590 "Malformed value for xml:lang : %s\n",
8591 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008592 }
8593 }
8594
8595 /*
8596 * Check that xml:space conforms to the specification
8597 */
8598 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8599 if (xmlStrEqual(val, BAD_CAST "default"))
8600 *(ctxt->space) = 0;
8601 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8602 *(ctxt->space) = 1;
8603 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008604 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008605"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008606 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008607 }
8608 }
8609
8610 *value = val;
8611 return(name);
8612}
8613
8614/**
8615 * xmlParseStartTag:
8616 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008617 *
Owen Taylor3473f882001-02-23 17:55:21 +00008618 * parse a start of tag either for rule element or
8619 * EmptyElement. In both case we don't parse the tag closing chars.
8620 *
8621 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8622 *
8623 * [ WFC: Unique Att Spec ]
8624 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008625 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008626 *
8627 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8628 *
8629 * [ WFC: Unique Att Spec ]
8630 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008631 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008632 *
8633 * With namespace:
8634 *
8635 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8636 *
8637 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8638 *
8639 * Returns the element name parsed
8640 */
8641
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008642const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008643xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008644 const xmlChar *name;
8645 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008646 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008647 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008648 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008649 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008650 int i;
8651
8652 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008653 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008654
8655 name = xmlParseName(ctxt);
8656 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008657 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008658 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008659 return(NULL);
8660 }
8661
8662 /*
8663 * Now parse the attributes, it ends up with the ending
8664 *
8665 * (S Attribute)* S?
8666 */
8667 SKIP_BLANKS;
8668 GROW;
8669
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008670 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008671 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008672 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008673 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008674 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008675
8676 attname = xmlParseAttribute(ctxt, &attvalue);
8677 if ((attname != NULL) && (attvalue != NULL)) {
8678 /*
8679 * [ WFC: Unique Att Spec ]
8680 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008681 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008682 */
8683 for (i = 0; i < nbatts;i += 2) {
8684 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008685 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008686 xmlFree(attvalue);
8687 goto failed;
8688 }
8689 }
Owen Taylor3473f882001-02-23 17:55:21 +00008690 /*
8691 * Add the pair to atts
8692 */
8693 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008694 maxatts = 22; /* allow for 10 attrs by default */
8695 atts = (const xmlChar **)
8696 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008697 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008698 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008699 if (attvalue != NULL)
8700 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008701 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008702 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008703 ctxt->atts = atts;
8704 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008705 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008706 const xmlChar **n;
8707
Owen Taylor3473f882001-02-23 17:55:21 +00008708 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008709 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008710 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008711 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008712 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008713 if (attvalue != NULL)
8714 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008715 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008716 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008717 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008718 ctxt->atts = atts;
8719 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008720 }
8721 atts[nbatts++] = attname;
8722 atts[nbatts++] = attvalue;
8723 atts[nbatts] = NULL;
8724 atts[nbatts + 1] = NULL;
8725 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008726 if (attvalue != NULL)
8727 xmlFree(attvalue);
8728 }
8729
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008730failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008731
Daniel Veillard3772de32002-12-17 10:31:45 +00008732 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008733 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8734 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008735 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008736 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8737 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008738 }
8739 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008740 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8741 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008742 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8743 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008744 break;
8745 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008746 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008747 GROW;
8748 }
8749
8750 /*
8751 * SAX: Start of Element !
8752 */
8753 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008754 (!ctxt->disableSAX)) {
8755 if (nbatts > 0)
8756 ctxt->sax->startElement(ctxt->userData, name, atts);
8757 else
8758 ctxt->sax->startElement(ctxt->userData, name, NULL);
8759 }
Owen Taylor3473f882001-02-23 17:55:21 +00008760
8761 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008762 /* Free only the content strings */
8763 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008764 if (atts[i] != NULL)
8765 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008766 }
8767 return(name);
8768}
8769
8770/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008771 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008772 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008773 * @line: line of the start tag
8774 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008775 *
8776 * parse an end of tag
8777 *
8778 * [42] ETag ::= '</' Name S? '>'
8779 *
8780 * With namespace
8781 *
8782 * [NS 9] ETag ::= '</' QName S? '>'
8783 */
8784
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008785static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008786xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008787 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008788
8789 GROW;
8790 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008791 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008792 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008793 return;
8794 }
8795 SKIP(2);
8796
Daniel Veillard46de64e2002-05-29 08:21:33 +00008797 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008798
8799 /*
8800 * We should definitely be at the ending "S? '>'" part
8801 */
8802 GROW;
8803 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008804 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008805 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008806 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008807 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008808
8809 /*
8810 * [ WFC: Element Type Match ]
8811 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008812 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008813 *
8814 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008815 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008816 if (name == NULL) name = BAD_CAST "unparseable";
8817 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008818 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008819 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008820 }
8821
8822 /*
8823 * SAX: End of Tag
8824 */
8825 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8826 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008827 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008828
Daniel Veillarde57ec792003-09-10 10:50:59 +00008829 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008830 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008831 return;
8832}
8833
8834/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008835 * xmlParseEndTag:
8836 * @ctxt: an XML parser context
8837 *
8838 * parse an end of tag
8839 *
8840 * [42] ETag ::= '</' Name S? '>'
8841 *
8842 * With namespace
8843 *
8844 * [NS 9] ETag ::= '</' QName S? '>'
8845 */
8846
8847void
8848xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008849 xmlParseEndTag1(ctxt, 0);
8850}
Daniel Veillard81273902003-09-30 00:43:48 +00008851#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008852
8853/************************************************************************
8854 * *
8855 * SAX 2 specific operations *
8856 * *
8857 ************************************************************************/
8858
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859/*
8860 * xmlGetNamespace:
8861 * @ctxt: an XML parser context
8862 * @prefix: the prefix to lookup
8863 *
8864 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008865 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008866 *
8867 * Returns the namespace name or NULL if not bound
8868 */
8869static const xmlChar *
8870xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8871 int i;
8872
Daniel Veillarde57ec792003-09-10 10:50:59 +00008873 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008874 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008875 if (ctxt->nsTab[i] == prefix) {
8876 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8877 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008878 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008879 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008880 return(NULL);
8881}
8882
8883/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008884 * xmlParseQName:
8885 * @ctxt: an XML parser context
8886 * @prefix: pointer to store the prefix part
8887 *
8888 * parse an XML Namespace QName
8889 *
8890 * [6] QName ::= (Prefix ':')? LocalPart
8891 * [7] Prefix ::= NCName
8892 * [8] LocalPart ::= NCName
8893 *
8894 * Returns the Name parsed or NULL
8895 */
8896
8897static const xmlChar *
8898xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8899 const xmlChar *l, *p;
8900
8901 GROW;
8902
8903 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008904 if (l == NULL) {
8905 if (CUR == ':') {
8906 l = xmlParseName(ctxt);
8907 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008908 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008909 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008910 *prefix = NULL;
8911 return(l);
8912 }
8913 }
8914 return(NULL);
8915 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008916 if (CUR == ':') {
8917 NEXT;
8918 p = l;
8919 l = xmlParseNCName(ctxt);
8920 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008921 xmlChar *tmp;
8922
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008923 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8924 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008925 l = xmlParseNmtoken(ctxt);
8926 if (l == NULL)
8927 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8928 else {
8929 tmp = xmlBuildQName(l, p, NULL, 0);
8930 xmlFree((char *)l);
8931 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008932 p = xmlDictLookup(ctxt->dict, tmp, -1);
8933 if (tmp != NULL) xmlFree(tmp);
8934 *prefix = NULL;
8935 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008936 }
8937 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008938 xmlChar *tmp;
8939
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008940 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8941 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008942 NEXT;
8943 tmp = (xmlChar *) xmlParseName(ctxt);
8944 if (tmp != NULL) {
8945 tmp = xmlBuildQName(tmp, l, NULL, 0);
8946 l = xmlDictLookup(ctxt->dict, tmp, -1);
8947 if (tmp != NULL) xmlFree(tmp);
8948 *prefix = p;
8949 return(l);
8950 }
8951 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8952 l = xmlDictLookup(ctxt->dict, tmp, -1);
8953 if (tmp != NULL) xmlFree(tmp);
8954 *prefix = p;
8955 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008956 }
8957 *prefix = p;
8958 } else
8959 *prefix = NULL;
8960 return(l);
8961}
8962
8963/**
8964 * xmlParseQNameAndCompare:
8965 * @ctxt: an XML parser context
8966 * @name: the localname
8967 * @prefix: the prefix, if any.
8968 *
8969 * parse an XML name and compares for match
8970 * (specialized for endtag parsing)
8971 *
8972 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8973 * and the name for mismatch
8974 */
8975
8976static const xmlChar *
8977xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8978 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008979 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008980 const xmlChar *in;
8981 const xmlChar *ret;
8982 const xmlChar *prefix2;
8983
8984 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8985
8986 GROW;
8987 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008988
Daniel Veillard0fb18932003-09-07 09:14:37 +00008989 cmp = prefix;
8990 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008991 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008992 ++cmp;
8993 }
8994 if ((*cmp == 0) && (*in == ':')) {
8995 in++;
8996 cmp = name;
8997 while (*in != 0 && *in == *cmp) {
8998 ++in;
8999 ++cmp;
9000 }
William M. Brack76e95df2003-10-18 16:20:14 +00009001 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009002 /* success */
9003 ctxt->input->cur = in;
9004 return((const xmlChar*) 1);
9005 }
9006 }
9007 /*
9008 * all strings coms from the dictionary, equality can be done directly
9009 */
9010 ret = xmlParseQName (ctxt, &prefix2);
9011 if ((ret == name) && (prefix == prefix2))
9012 return((const xmlChar*) 1);
9013 return ret;
9014}
9015
9016/**
9017 * xmlParseAttValueInternal:
9018 * @ctxt: an XML parser context
9019 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009020 * @alloc: whether the attribute was reallocated as a new string
9021 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00009022 *
9023 * parse a value for an attribute.
9024 * NOTE: if no normalization is needed, the routine will return pointers
9025 * directly from the data buffer.
9026 *
9027 * 3.3.3 Attribute-Value Normalization:
9028 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009029 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009030 * - a character reference is processed by appending the referenced
9031 * character to the attribute value
9032 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009033 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00009034 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9035 * appending #x20 to the normalized value, except that only a single
9036 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009037 * parsed entity or the literal entity value of an internal parsed entity
9038 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00009039 * If the declared value is not CDATA, then the XML processor must further
9040 * process the normalized attribute value by discarding any leading and
9041 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009042 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009043 * All attributes for which no declaration has been read should be treated
9044 * by a non-validating parser as if declared CDATA.
9045 *
9046 * Returns the AttValue parsed or NULL. The value has to be freed by the
9047 * caller if it was copied, this can be detected by val[*len] == 0.
9048 */
9049
9050static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009051xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9052 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009053{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009054 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009055 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009056 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08009057 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009058
9059 GROW;
9060 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08009061 line = ctxt->input->line;
9062 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009063 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009064 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009065 return (NULL);
9066 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009067 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009068
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009069 /*
9070 * try to handle in this routine the most common case where no
9071 * allocation of a new string is required and where content is
9072 * pure ASCII.
9073 */
9074 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009075 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009076 end = ctxt->input->end;
9077 start = in;
9078 if (in >= end) {
9079 const xmlChar *oldbase = ctxt->input->base;
9080 GROW;
9081 if (oldbase != ctxt->input->base) {
9082 long delta = ctxt->input->base - oldbase;
9083 start = start + delta;
9084 in = in + delta;
9085 }
9086 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009087 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009088 if (normalize) {
9089 /*
9090 * Skip any leading spaces
9091 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009092 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009093 ((*in == 0x20) || (*in == 0x9) ||
9094 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009095 if (*in == 0xA) {
9096 line++; col = 1;
9097 } else {
9098 col++;
9099 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009100 in++;
9101 start = in;
9102 if (in >= end) {
9103 const xmlChar *oldbase = ctxt->input->base;
9104 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009105 if (ctxt->instate == XML_PARSER_EOF)
9106 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009107 if (oldbase != ctxt->input->base) {
9108 long delta = ctxt->input->base - oldbase;
9109 start = start + delta;
9110 in = in + delta;
9111 }
9112 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009113 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9114 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9115 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009116 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009117 return(NULL);
9118 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009119 }
9120 }
9121 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9122 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009123 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009124 if ((*in++ == 0x20) && (*in == 0x20)) break;
9125 if (in >= end) {
9126 const xmlChar *oldbase = ctxt->input->base;
9127 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009128 if (ctxt->instate == XML_PARSER_EOF)
9129 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009130 if (oldbase != ctxt->input->base) {
9131 long delta = ctxt->input->base - oldbase;
9132 start = start + delta;
9133 in = in + delta;
9134 }
9135 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009136 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9137 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9138 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009139 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009140 return(NULL);
9141 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009142 }
9143 }
9144 last = in;
9145 /*
9146 * skip the trailing blanks
9147 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009148 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009149 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009150 ((*in == 0x20) || (*in == 0x9) ||
9151 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009152 if (*in == 0xA) {
9153 line++, col = 1;
9154 } else {
9155 col++;
9156 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009157 in++;
9158 if (in >= end) {
9159 const xmlChar *oldbase = ctxt->input->base;
9160 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009161 if (ctxt->instate == XML_PARSER_EOF)
9162 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009163 if (oldbase != ctxt->input->base) {
9164 long delta = ctxt->input->base - oldbase;
9165 start = start + delta;
9166 in = in + delta;
9167 last = last + delta;
9168 }
9169 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009170 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9171 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9172 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009173 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009174 return(NULL);
9175 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009176 }
9177 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009178 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9179 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9180 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009181 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009182 return(NULL);
9183 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009184 if (*in != limit) goto need_complex;
9185 } else {
9186 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9187 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9188 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009189 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009190 if (in >= end) {
9191 const xmlChar *oldbase = ctxt->input->base;
9192 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009193 if (ctxt->instate == XML_PARSER_EOF)
9194 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009195 if (oldbase != ctxt->input->base) {
9196 long delta = ctxt->input->base - oldbase;
9197 start = start + delta;
9198 in = in + delta;
9199 }
9200 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009201 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9202 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9203 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009204 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009205 return(NULL);
9206 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009207 }
9208 }
9209 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009210 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9211 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9212 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009213 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009214 return(NULL);
9215 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009216 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009217 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009218 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009219 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009220 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009221 *len = last - start;
9222 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009223 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009224 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009225 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009226 }
9227 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009228 ctxt->input->line = line;
9229 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009230 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009231 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009232need_complex:
9233 if (alloc) *alloc = 1;
9234 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009235}
9236
9237/**
9238 * xmlParseAttribute2:
9239 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009240 * @pref: the element prefix
9241 * @elem: the element name
9242 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009243 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009244 * @len: an int * to save the length of the attribute
9245 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009246 *
9247 * parse an attribute in the new SAX2 framework.
9248 *
9249 * Returns the attribute name, and the value in *value, .
9250 */
9251
9252static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009253xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009254 const xmlChar * pref, const xmlChar * elem,
9255 const xmlChar ** prefix, xmlChar ** value,
9256 int *len, int *alloc)
9257{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009258 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009259 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009260 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009261
9262 *value = NULL;
9263 GROW;
9264 name = xmlParseQName(ctxt, prefix);
9265 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009266 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9267 "error parsing attribute name\n");
9268 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009269 }
9270
9271 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009272 * get the type if needed
9273 */
9274 if (ctxt->attsSpecial != NULL) {
9275 int type;
9276
9277 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009278 pref, elem, *prefix, name);
9279 if (type != 0)
9280 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009281 }
9282
9283 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009284 * read the value
9285 */
9286 SKIP_BLANKS;
9287 if (RAW == '=') {
9288 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009289 SKIP_BLANKS;
9290 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9291 if (normalize) {
9292 /*
9293 * Sometimes a second normalisation pass for spaces is needed
9294 * but that only happens if charrefs or entities refernces
9295 * have been used in the attribute value, i.e. the attribute
9296 * value have been extracted in an allocated string already.
9297 */
9298 if (*alloc) {
9299 const xmlChar *val2;
9300
9301 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009302 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009303 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009304 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009305 }
9306 }
9307 }
9308 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009309 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009310 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9311 "Specification mandate value for attribute %s\n",
9312 name);
9313 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009314 }
9315
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009316 if (*prefix == ctxt->str_xml) {
9317 /*
9318 * Check that xml:lang conforms to the specification
9319 * No more registered as an error, just generate a warning now
9320 * since this was deprecated in XML second edition
9321 */
9322 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9323 internal_val = xmlStrndup(val, *len);
9324 if (!xmlCheckLanguageID(internal_val)) {
9325 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9326 "Malformed value for xml:lang : %s\n",
9327 internal_val, NULL);
9328 }
9329 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009330
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009331 /*
9332 * Check that xml:space conforms to the specification
9333 */
9334 if (xmlStrEqual(name, BAD_CAST "space")) {
9335 internal_val = xmlStrndup(val, *len);
9336 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9337 *(ctxt->space) = 0;
9338 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9339 *(ctxt->space) = 1;
9340 else {
9341 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9342 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9343 internal_val, NULL);
9344 }
9345 }
9346 if (internal_val) {
9347 xmlFree(internal_val);
9348 }
9349 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009350
9351 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009352 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009353}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009354/**
9355 * xmlParseStartTag2:
9356 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009357 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009358 * parse a start of tag either for rule element or
9359 * EmptyElement. In both case we don't parse the tag closing chars.
9360 * This routine is called when running SAX2 parsing
9361 *
9362 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9363 *
9364 * [ WFC: Unique Att Spec ]
9365 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009366 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009367 *
9368 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9369 *
9370 * [ WFC: Unique Att Spec ]
9371 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009372 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009373 *
9374 * With namespace:
9375 *
9376 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9377 *
9378 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9379 *
9380 * Returns the element name parsed
9381 */
9382
9383static const xmlChar *
9384xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009385 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009386 const xmlChar *localname;
9387 const xmlChar *prefix;
9388 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009389 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009390 const xmlChar *nsname;
9391 xmlChar *attvalue;
9392 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009393 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009394 int nratts, nbatts, nbdef;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009395 int i, j, nbNs, attval, oldline, oldcol, inputNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009396 const xmlChar *base;
9397 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009398 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009399
9400 if (RAW != '<') return(NULL);
9401 NEXT1;
9402
9403 /*
9404 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9405 * point since the attribute values may be stored as pointers to
9406 * the buffer and calling SHRINK would destroy them !
9407 * The Shrinking is only possible once the full set of attribute
9408 * callbacks have been done.
9409 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009410reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009411 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009412 base = ctxt->input->base;
9413 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009414 inputNr = ctxt->inputNr;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009415 oldline = ctxt->input->line;
9416 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009417 nbatts = 0;
9418 nratts = 0;
9419 nbdef = 0;
9420 nbNs = 0;
9421 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009422 /* Forget any namespaces added during an earlier parse of this element. */
9423 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009424
9425 localname = xmlParseQName(ctxt, &prefix);
9426 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009427 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9428 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009429 return(NULL);
9430 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009431 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009432
9433 /*
9434 * Now parse the attributes, it ends up with the ending
9435 *
9436 * (S Attribute)* S?
9437 */
9438 SKIP_BLANKS;
9439 GROW;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009440 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9441 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009442
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009443 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009444 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009445 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009446 const xmlChar *q = CUR_PTR;
9447 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009448 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009449
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009450 attname = xmlParseAttribute2(ctxt, prefix, localname,
9451 &aprefix, &attvalue, &len, &alloc);
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009452 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
Daniel Veillarddcec6722006-10-15 20:32:53 +00009453 if ((attvalue != NULL) && (alloc != 0))
9454 xmlFree(attvalue);
9455 attvalue = NULL;
9456 goto base_changed;
9457 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009458 if ((attname != NULL) && (attvalue != NULL)) {
9459 if (len < 0) len = xmlStrlen(attvalue);
9460 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009461 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9462 xmlURIPtr uri;
9463
Daniel Veillardc836ba62014-07-14 16:39:50 +08009464 if (URL == NULL) {
9465 xmlErrMemory(ctxt, "dictionary allocation failure");
9466 if ((attvalue != NULL) && (alloc != 0))
9467 xmlFree(attvalue);
9468 return(NULL);
9469 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009470 if (*URL != 0) {
9471 uri = xmlParseURI((const char *) URL);
9472 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009473 xmlNsErr(ctxt, XML_WAR_NS_URI,
9474 "xmlns: '%s' is not a valid URI\n",
9475 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009476 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009477 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009478 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9479 "xmlns: URI %s is not absolute\n",
9480 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009481 }
9482 xmlFreeURI(uri);
9483 }
Daniel Veillard37334572008-07-31 08:20:02 +00009484 if (URL == ctxt->str_xml_ns) {
9485 if (attname != ctxt->str_xml) {
9486 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9487 "xml namespace URI cannot be the default namespace\n",
9488 NULL, NULL, NULL);
9489 }
9490 goto skip_default_ns;
9491 }
9492 if ((len == 29) &&
9493 (xmlStrEqual(URL,
9494 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9495 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9496 "reuse of the xmlns namespace name is forbidden\n",
9497 NULL, NULL, NULL);
9498 goto skip_default_ns;
9499 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009500 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009501 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009502 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009503 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009504 for (j = 1;j <= nbNs;j++)
9505 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9506 break;
9507 if (j <= nbNs)
9508 xmlErrAttributeDup(ctxt, NULL, attname);
9509 else
9510 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009511skip_default_ns:
Pranjal Jumde38eae572016-03-07 14:04:08 -08009512 if ((attvalue != NULL) && (alloc != 0)) {
9513 xmlFree(attvalue);
9514 attvalue = NULL;
9515 }
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009516 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9517 break;
9518 if (!IS_BLANK_CH(RAW)) {
9519 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9520 "attributes construct error\n");
9521 break;
9522 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009523 SKIP_BLANKS;
Pranjal Jumde38eae572016-03-07 14:04:08 -08009524 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9525 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009526 continue;
9527 }
9528 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009529 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9530 xmlURIPtr uri;
9531
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009532 if (attname == ctxt->str_xml) {
9533 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009534 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9535 "xml namespace prefix mapped to wrong URI\n",
9536 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009537 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009538 /*
9539 * Do not keep a namespace definition node
9540 */
Daniel Veillard37334572008-07-31 08:20:02 +00009541 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009542 }
Daniel Veillard37334572008-07-31 08:20:02 +00009543 if (URL == ctxt->str_xml_ns) {
9544 if (attname != ctxt->str_xml) {
9545 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9546 "xml namespace URI mapped to wrong prefix\n",
9547 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009548 }
Daniel Veillard37334572008-07-31 08:20:02 +00009549 goto skip_ns;
9550 }
9551 if (attname == ctxt->str_xmlns) {
9552 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9553 "redefinition of the xmlns prefix is forbidden\n",
9554 NULL, NULL, NULL);
9555 goto skip_ns;
9556 }
9557 if ((len == 29) &&
9558 (xmlStrEqual(URL,
9559 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9560 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9561 "reuse of the xmlns namespace name is forbidden\n",
9562 NULL, NULL, NULL);
9563 goto skip_ns;
9564 }
9565 if ((URL == NULL) || (URL[0] == 0)) {
9566 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9567 "xmlns:%s: Empty XML namespace is not allowed\n",
9568 attname, NULL, NULL);
9569 goto skip_ns;
9570 } else {
9571 uri = xmlParseURI((const char *) URL);
9572 if (uri == NULL) {
9573 xmlNsErr(ctxt, XML_WAR_NS_URI,
9574 "xmlns:%s: '%s' is not a valid URI\n",
9575 attname, URL, NULL);
9576 } else {
9577 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9578 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9579 "xmlns:%s: URI %s is not absolute\n",
9580 attname, URL, NULL);
9581 }
9582 xmlFreeURI(uri);
9583 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009584 }
9585
Daniel Veillard0fb18932003-09-07 09:14:37 +00009586 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009587 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009588 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009589 for (j = 1;j <= nbNs;j++)
9590 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9591 break;
9592 if (j <= nbNs)
9593 xmlErrAttributeDup(ctxt, aprefix, attname);
9594 else
9595 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009596skip_ns:
Pranjal Jumde38eae572016-03-07 14:04:08 -08009597 if ((attvalue != NULL) && (alloc != 0)) {
9598 xmlFree(attvalue);
9599 attvalue = NULL;
9600 }
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009601 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9602 break;
9603 if (!IS_BLANK_CH(RAW)) {
9604 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9605 "attributes construct error\n");
9606 break;
9607 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009608 SKIP_BLANKS;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009609 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9610 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009611 continue;
9612 }
9613
9614 /*
9615 * Add the pair to atts
9616 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009617 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9618 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009619 if (attvalue[len] == 0)
9620 xmlFree(attvalue);
9621 goto failed;
9622 }
9623 maxatts = ctxt->maxatts;
9624 atts = ctxt->atts;
9625 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009626 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009627 atts[nbatts++] = attname;
9628 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009629 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009630 atts[nbatts++] = attvalue;
9631 attvalue += len;
9632 atts[nbatts++] = attvalue;
9633 /*
9634 * tag if some deallocation is needed
9635 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009636 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009637 } else {
9638 if ((attvalue != NULL) && (attvalue[len] == 0))
9639 xmlFree(attvalue);
9640 }
9641
Daniel Veillard37334572008-07-31 08:20:02 +00009642failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009643
9644 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009645 if (ctxt->instate == XML_PARSER_EOF)
9646 break;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009647 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9648 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009649 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9650 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009651 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009652 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9653 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009654 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009655 }
9656 SKIP_BLANKS;
9657 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9658 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009659 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009660 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009661 break;
9662 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009663 GROW;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009664 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9665 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009666 }
9667
Daniel Veillard0fb18932003-09-07 09:14:37 +00009668 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009669 * The attributes defaulting
9670 */
9671 if (ctxt->attsDefault != NULL) {
9672 xmlDefAttrsPtr defaults;
9673
9674 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9675 if (defaults != NULL) {
9676 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009677 attname = defaults->values[5 * i];
9678 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009679
9680 /*
9681 * special work for namespaces defaulted defs
9682 */
9683 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9684 /*
9685 * check that it's not a defined namespace
9686 */
9687 for (j = 1;j <= nbNs;j++)
9688 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9689 break;
9690 if (j <= nbNs) continue;
9691
9692 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009693 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009694 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009695 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009696 nbNs++;
9697 }
9698 } else if (aprefix == ctxt->str_xmlns) {
9699 /*
9700 * check that it's not a defined namespace
9701 */
9702 for (j = 1;j <= nbNs;j++)
9703 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9704 break;
9705 if (j <= nbNs) continue;
9706
9707 nsname = xmlGetNamespace(ctxt, attname);
9708 if (nsname != defaults->values[2]) {
9709 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009710 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009711 nbNs++;
9712 }
9713 } else {
9714 /*
9715 * check that it's not a defined attribute
9716 */
9717 for (j = 0;j < nbatts;j+=5) {
9718 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9719 break;
9720 }
9721 if (j < nbatts) continue;
9722
9723 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9724 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009725 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009726 }
9727 maxatts = ctxt->maxatts;
9728 atts = ctxt->atts;
9729 }
9730 atts[nbatts++] = attname;
9731 atts[nbatts++] = aprefix;
9732 if (aprefix == NULL)
9733 atts[nbatts++] = NULL;
9734 else
9735 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009736 atts[nbatts++] = defaults->values[5 * i + 2];
9737 atts[nbatts++] = defaults->values[5 * i + 3];
9738 if ((ctxt->standalone == 1) &&
9739 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009740 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009741 "standalone: attribute %s on %s defaulted from external subset\n",
9742 attname, localname);
9743 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009744 nbdef++;
9745 }
9746 }
9747 }
9748 }
9749
Daniel Veillarde70c8772003-11-25 07:21:18 +00009750 /*
9751 * The attributes checkings
9752 */
9753 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009754 /*
9755 * The default namespace does not apply to attribute names.
9756 */
9757 if (atts[i + 1] != NULL) {
9758 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9759 if (nsname == NULL) {
9760 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9761 "Namespace prefix %s for %s on %s is not defined\n",
9762 atts[i + 1], atts[i], localname);
9763 }
9764 atts[i + 2] = nsname;
9765 } else
9766 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009767 /*
9768 * [ WFC: Unique Att Spec ]
9769 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009770 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009771 * As extended by the Namespace in XML REC.
9772 */
9773 for (j = 0; j < i;j += 5) {
9774 if (atts[i] == atts[j]) {
9775 if (atts[i+1] == atts[j+1]) {
9776 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9777 break;
9778 }
9779 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9780 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9781 "Namespaced Attribute %s in '%s' redefined\n",
9782 atts[i], nsname, NULL);
9783 break;
9784 }
9785 }
9786 }
9787 }
9788
Daniel Veillarde57ec792003-09-10 10:50:59 +00009789 nsname = xmlGetNamespace(ctxt, prefix);
9790 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009791 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9792 "Namespace prefix %s on %s is not defined\n",
9793 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009794 }
9795 *pref = prefix;
9796 *URI = nsname;
9797
9798 /*
9799 * SAX: Start of Element !
9800 */
9801 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9802 (!ctxt->disableSAX)) {
9803 if (nbNs > 0)
9804 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9805 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9806 nbatts / 5, nbdef, atts);
9807 else
9808 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9809 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9810 }
9811
9812 /*
9813 * Free up attribute allocated strings if needed
9814 */
9815 if (attval != 0) {
9816 for (i = 3,j = 0; j < nratts;i += 5,j++)
9817 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9818 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009819 }
9820
9821 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009822
9823base_changed:
9824 /*
9825 * the attribute strings are valid iif the base didn't changed
9826 */
9827 if (attval != 0) {
9828 for (i = 3,j = 0; j < nratts;i += 5,j++)
9829 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9830 xmlFree((xmlChar *) atts[i]);
9831 }
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009832
9833 /*
9834 * We can't switch from one entity to another in the middle
9835 * of a start tag
9836 */
9837 if (inputNr != ctxt->inputNr) {
9838 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9839 "Start tag doesn't start and stop in the same entity\n");
9840 return(NULL);
9841 }
9842
Daniel Veillarde57ec792003-09-10 10:50:59 +00009843 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009844 ctxt->input->line = oldline;
9845 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009846 if (ctxt->wellFormed == 1) {
9847 goto reparse;
9848 }
9849 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009850}
9851
9852/**
9853 * xmlParseEndTag2:
9854 * @ctxt: an XML parser context
9855 * @line: line of the start tag
9856 * @nsNr: number of namespaces on the start tag
9857 *
9858 * parse an end of tag
9859 *
9860 * [42] ETag ::= '</' Name S? '>'
9861 *
9862 * With namespace
9863 *
9864 * [NS 9] ETag ::= '</' QName S? '>'
9865 */
9866
9867static void
9868xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009869 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009870 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009871 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009872
9873 GROW;
9874 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009875 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009876 return;
9877 }
9878 SKIP(2);
9879
David Kilzerdb07dd62016-02-12 09:58:29 -08009880 curLength = ctxt->input->end - ctxt->input->cur;
9881 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9882 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9883 if ((curLength >= (size_t)(tlen + 1)) &&
9884 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009885 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009886 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009887 goto done;
9888 }
9889 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009890 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009891 name = (xmlChar*)1;
9892 } else {
9893 if (prefix == NULL)
9894 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9895 else
9896 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9897 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009898
9899 /*
9900 * We should definitely be at the ending "S? '>'" part
9901 */
9902 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009903 if (ctxt->instate == XML_PARSER_EOF)
9904 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009905 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009906 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009907 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009908 } else
9909 NEXT1;
9910
9911 /*
9912 * [ WFC: Element Type Match ]
9913 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009914 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009915 *
9916 */
9917 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009918 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009919 if ((line == 0) && (ctxt->node != NULL))
9920 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009921 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009922 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009923 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009924 }
9925
9926 /*
9927 * SAX: End of Tag
9928 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009929done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009930 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9931 (!ctxt->disableSAX))
9932 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9933
Daniel Veillard0fb18932003-09-07 09:14:37 +00009934 spacePop(ctxt);
9935 if (nsNr != 0)
9936 nsPop(ctxt, nsNr);
9937 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009938}
9939
9940/**
Owen Taylor3473f882001-02-23 17:55:21 +00009941 * xmlParseCDSect:
9942 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009943 *
Owen Taylor3473f882001-02-23 17:55:21 +00009944 * Parse escaped pure raw content.
9945 *
9946 * [18] CDSect ::= CDStart CData CDEnd
9947 *
9948 * [19] CDStart ::= '<![CDATA['
9949 *
9950 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9951 *
9952 * [21] CDEnd ::= ']]>'
9953 */
9954void
9955xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9956 xmlChar *buf = NULL;
9957 int len = 0;
9958 int size = XML_PARSER_BUFFER_SIZE;
9959 int r, rl;
9960 int s, sl;
9961 int cur, l;
9962 int count = 0;
9963
Daniel Veillard8f597c32003-10-06 08:19:27 +00009964 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009965 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009966 SKIP(9);
9967 } else
9968 return;
9969
9970 ctxt->instate = XML_PARSER_CDATA_SECTION;
9971 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009972 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009973 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009974 ctxt->instate = XML_PARSER_CONTENT;
9975 return;
9976 }
9977 NEXTL(rl);
9978 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009979 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009980 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009981 ctxt->instate = XML_PARSER_CONTENT;
9982 return;
9983 }
9984 NEXTL(sl);
9985 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009986 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009987 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009988 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009989 return;
9990 }
William M. Brack871611b2003-10-18 04:53:14 +00009991 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009992 ((r != ']') || (s != ']') || (cur != '>'))) {
9993 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009994 xmlChar *tmp;
9995
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009996 if ((size > XML_MAX_TEXT_LENGTH) &&
9997 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9998 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9999 "CData section too big found", NULL);
10000 xmlFree (buf);
10001 return;
10002 }
10003 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +000010004 if (tmp == NULL) {
10005 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010006 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010007 return;
10008 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010009 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080010010 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +000010011 }
10012 COPY_BUF(rl,buf,len,r);
10013 r = s;
10014 rl = sl;
10015 s = cur;
10016 sl = l;
10017 count++;
10018 if (count > 50) {
10019 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010020 if (ctxt->instate == XML_PARSER_EOF) {
10021 xmlFree(buf);
10022 return;
10023 }
Owen Taylor3473f882001-02-23 17:55:21 +000010024 count = 0;
10025 }
10026 NEXTL(l);
10027 cur = CUR_CHAR(l);
10028 }
10029 buf[len] = 0;
10030 ctxt->instate = XML_PARSER_CONTENT;
10031 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010032 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +000010033 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010034 xmlFree(buf);
10035 return;
10036 }
10037 NEXTL(l);
10038
10039 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010040 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +000010041 */
10042 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10043 if (ctxt->sax->cdataBlock != NULL)
10044 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +000010045 else if (ctxt->sax->characters != NULL)
10046 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +000010047 }
10048 xmlFree(buf);
10049}
10050
10051/**
10052 * xmlParseContent:
10053 * @ctxt: an XML parser context
10054 *
10055 * Parse a content:
10056 *
10057 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10058 */
10059
10060void
10061xmlParseContent(xmlParserCtxtPtr ctxt) {
10062 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +000010063 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010064 ((RAW != '<') || (NXT(1) != '/')) &&
10065 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010066 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +000010067 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +000010068 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010069
10070 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010071 * First case : a Processing Instruction.
10072 */
Daniel Veillardfdc91562002-07-01 21:52:03 +000010073 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010074 xmlParsePI(ctxt);
10075 }
10076
10077 /*
10078 * Second case : a CDSection
10079 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010080 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010081 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010082 xmlParseCDSect(ctxt);
10083 }
10084
10085 /*
10086 * Third case : a comment
10087 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010088 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010089 (NXT(2) == '-') && (NXT(3) == '-')) {
10090 xmlParseComment(ctxt);
10091 ctxt->instate = XML_PARSER_CONTENT;
10092 }
10093
10094 /*
10095 * Fourth case : a sub-element.
10096 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010097 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +000010098 xmlParseElement(ctxt);
10099 }
10100
10101 /*
10102 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010103 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +000010104 */
10105
Daniel Veillard21a0f912001-02-25 19:54:14 +000010106 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +000010107 xmlParseReference(ctxt);
10108 }
10109
10110 /*
10111 * Last case, text. Note that References are handled directly.
10112 */
10113 else {
10114 xmlParseCharData(ctxt, 0);
10115 }
10116
10117 GROW;
10118 /*
10119 * Pop-up of finished entities.
10120 */
Daniel Veillard561b7f82002-03-20 21:55:57 +000010121 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +000010122 xmlPopInput(ctxt);
10123 SHRINK;
10124
Daniel Veillardfdc91562002-07-01 21:52:03 +000010125 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010126 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10127 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080010128 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010129 break;
10130 }
10131 }
10132}
10133
10134/**
10135 * xmlParseElement:
10136 * @ctxt: an XML parser context
10137 *
10138 * parse an XML element, this is highly recursive
10139 *
10140 * [39] element ::= EmptyElemTag | STag content ETag
10141 *
10142 * [ WFC: Element Type Match ]
10143 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010144 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +000010145 *
Owen Taylor3473f882001-02-23 17:55:21 +000010146 */
10147
10148void
10149xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010150 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010151 const xmlChar *prefix = NULL;
10152 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010153 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010154 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010155 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010156 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010157
Daniel Veillard8915c152008-08-26 13:05:34 +000010158 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10159 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10160 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10161 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10162 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +080010163 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010164 return;
10165 }
10166
Owen Taylor3473f882001-02-23 17:55:21 +000010167 /* Capture start position */
10168 if (ctxt->record_info) {
10169 node_info.begin_pos = ctxt->input->consumed +
10170 (CUR_PTR - ctxt->input->base);
10171 node_info.begin_line = ctxt->input->line;
10172 }
10173
10174 if (ctxt->spaceNr == 0)
10175 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010176 else if (*ctxt->space == -2)
10177 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010178 else
10179 spacePush(ctxt, *ctxt->space);
10180
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010181 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010182#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010183 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010184#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010185 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010186#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010187 else
10188 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010189#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010190 if (ctxt->instate == XML_PARSER_EOF)
10191 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010192 if (name == NULL) {
10193 spacePop(ctxt);
10194 return;
10195 }
10196 namePush(ctxt, name);
10197 ret = ctxt->node;
10198
Daniel Veillard4432df22003-09-28 18:58:27 +000010199#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010200 /*
10201 * [ VC: Root Element Type ]
10202 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010203 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010204 */
10205 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10206 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10207 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010208#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010209
10210 /*
10211 * Check for an Empty Element.
10212 */
10213 if ((RAW == '/') && (NXT(1) == '>')) {
10214 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010215 if (ctxt->sax2) {
10216 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10217 (!ctxt->disableSAX))
10218 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010219#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010220 } else {
10221 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10222 (!ctxt->disableSAX))
10223 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010224#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010225 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010226 namePop(ctxt);
10227 spacePop(ctxt);
10228 if (nsNr != ctxt->nsNr)
10229 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010230 if ( ret != NULL && ctxt->record_info ) {
10231 node_info.end_pos = ctxt->input->consumed +
10232 (CUR_PTR - ctxt->input->base);
10233 node_info.end_line = ctxt->input->line;
10234 node_info.node = ret;
10235 xmlParserAddNodeInfo(ctxt, &node_info);
10236 }
10237 return;
10238 }
10239 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010240 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010241 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010242 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10243 "Couldn't find end of Start Tag %s line %d\n",
10244 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010245
10246 /*
10247 * end of parsing of this node.
10248 */
10249 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010250 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010251 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010252 if (nsNr != ctxt->nsNr)
10253 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010254
10255 /*
10256 * Capture end position and add node
10257 */
10258 if ( ret != NULL && ctxt->record_info ) {
10259 node_info.end_pos = ctxt->input->consumed +
10260 (CUR_PTR - ctxt->input->base);
10261 node_info.end_line = ctxt->input->line;
10262 node_info.node = ret;
10263 xmlParserAddNodeInfo(ctxt, &node_info);
10264 }
10265 return;
10266 }
10267
10268 /*
10269 * Parse the content of the element:
10270 */
10271 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010272 if (ctxt->instate == XML_PARSER_EOF)
10273 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010274 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010275 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010276 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010277 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010278
10279 /*
10280 * end of parsing of this node.
10281 */
10282 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010283 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010284 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010285 if (nsNr != ctxt->nsNr)
10286 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010287 return;
10288 }
10289
10290 /*
10291 * parse the end of tag: '</' should be here.
10292 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010293 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010294 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010295 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010296 }
10297#ifdef LIBXML_SAX1_ENABLED
10298 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010299 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010300#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010301
10302 /*
10303 * Capture end position and add node
10304 */
10305 if ( ret != NULL && ctxt->record_info ) {
10306 node_info.end_pos = ctxt->input->consumed +
10307 (CUR_PTR - ctxt->input->base);
10308 node_info.end_line = ctxt->input->line;
10309 node_info.node = ret;
10310 xmlParserAddNodeInfo(ctxt, &node_info);
10311 }
10312}
10313
10314/**
10315 * xmlParseVersionNum:
10316 * @ctxt: an XML parser context
10317 *
10318 * parse the XML version value.
10319 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010320 * [26] VersionNum ::= '1.' [0-9]+
10321 *
10322 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010323 *
10324 * Returns the string giving the XML version number, or NULL
10325 */
10326xmlChar *
10327xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10328 xmlChar *buf = NULL;
10329 int len = 0;
10330 int size = 10;
10331 xmlChar cur;
10332
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010333 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010334 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010335 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010336 return(NULL);
10337 }
10338 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010339 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010340 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010341 return(NULL);
10342 }
10343 buf[len++] = cur;
10344 NEXT;
10345 cur=CUR;
10346 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010347 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010348 return(NULL);
10349 }
10350 buf[len++] = cur;
10351 NEXT;
10352 cur=CUR;
10353 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010354 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010355 xmlChar *tmp;
10356
Owen Taylor3473f882001-02-23 17:55:21 +000010357 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010358 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10359 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010360 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010361 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010362 return(NULL);
10363 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010364 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010365 }
10366 buf[len++] = cur;
10367 NEXT;
10368 cur=CUR;
10369 }
10370 buf[len] = 0;
10371 return(buf);
10372}
10373
10374/**
10375 * xmlParseVersionInfo:
10376 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010377 *
Owen Taylor3473f882001-02-23 17:55:21 +000010378 * parse the XML version.
10379 *
10380 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010381 *
Owen Taylor3473f882001-02-23 17:55:21 +000010382 * [25] Eq ::= S? '=' S?
10383 *
10384 * Returns the version string, e.g. "1.0"
10385 */
10386
10387xmlChar *
10388xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10389 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010390
Daniel Veillarda07050d2003-10-19 14:46:32 +000010391 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010392 SKIP(7);
10393 SKIP_BLANKS;
10394 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010395 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010396 return(NULL);
10397 }
10398 NEXT;
10399 SKIP_BLANKS;
10400 if (RAW == '"') {
10401 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010402 version = xmlParseVersionNum(ctxt);
10403 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010404 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010405 } else
10406 NEXT;
10407 } else if (RAW == '\''){
10408 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010409 version = xmlParseVersionNum(ctxt);
10410 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010411 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010412 } else
10413 NEXT;
10414 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010415 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010416 }
10417 }
10418 return(version);
10419}
10420
10421/**
10422 * xmlParseEncName:
10423 * @ctxt: an XML parser context
10424 *
10425 * parse the XML encoding name
10426 *
10427 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10428 *
10429 * Returns the encoding name value or NULL
10430 */
10431xmlChar *
10432xmlParseEncName(xmlParserCtxtPtr ctxt) {
10433 xmlChar *buf = NULL;
10434 int len = 0;
10435 int size = 10;
10436 xmlChar cur;
10437
10438 cur = CUR;
10439 if (((cur >= 'a') && (cur <= 'z')) ||
10440 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010441 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010442 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010443 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010444 return(NULL);
10445 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010446
Owen Taylor3473f882001-02-23 17:55:21 +000010447 buf[len++] = cur;
10448 NEXT;
10449 cur = CUR;
10450 while (((cur >= 'a') && (cur <= 'z')) ||
10451 ((cur >= 'A') && (cur <= 'Z')) ||
10452 ((cur >= '0') && (cur <= '9')) ||
10453 (cur == '.') || (cur == '_') ||
10454 (cur == '-')) {
10455 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010456 xmlChar *tmp;
10457
Owen Taylor3473f882001-02-23 17:55:21 +000010458 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010459 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10460 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010461 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010462 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010463 return(NULL);
10464 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010465 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010466 }
10467 buf[len++] = cur;
10468 NEXT;
10469 cur = CUR;
10470 if (cur == 0) {
10471 SHRINK;
10472 GROW;
10473 cur = CUR;
10474 }
10475 }
10476 buf[len] = 0;
10477 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010478 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010479 }
10480 return(buf);
10481}
10482
10483/**
10484 * xmlParseEncodingDecl:
10485 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010486 *
Owen Taylor3473f882001-02-23 17:55:21 +000010487 * parse the XML encoding declaration
10488 *
10489 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10490 *
10491 * this setups the conversion filters.
10492 *
10493 * Returns the encoding value or NULL
10494 */
10495
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010496const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010497xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10498 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010499
10500 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010501 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010502 SKIP(8);
10503 SKIP_BLANKS;
10504 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010505 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010506 return(NULL);
10507 }
10508 NEXT;
10509 SKIP_BLANKS;
10510 if (RAW == '"') {
10511 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010512 encoding = xmlParseEncName(ctxt);
10513 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010514 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010515 xmlFree((xmlChar *) encoding);
10516 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010517 } else
10518 NEXT;
10519 } else if (RAW == '\''){
10520 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010521 encoding = xmlParseEncName(ctxt);
10522 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010523 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010524 xmlFree((xmlChar *) encoding);
10525 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010526 } else
10527 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010528 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010529 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010530 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010531
10532 /*
10533 * Non standard parsing, allowing the user to ignore encoding
10534 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010535 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10536 xmlFree((xmlChar *) encoding);
10537 return(NULL);
10538 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010539
Daniel Veillard6b621b82003-08-11 15:03:34 +000010540 /*
10541 * UTF-16 encoding stwich has already taken place at this stage,
10542 * more over the little-endian/big-endian selection is already done
10543 */
10544 if ((encoding != NULL) &&
10545 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10546 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010547 /*
10548 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010549 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010550 * document is apparently UTF-8 compatible, then raise an
10551 * encoding mismatch fatal error
10552 */
10553 if ((ctxt->encoding == NULL) &&
10554 (ctxt->input->buf != NULL) &&
10555 (ctxt->input->buf->encoder == NULL)) {
10556 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10557 "Document labelled UTF-16 but has UTF-8 content\n");
10558 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010559 if (ctxt->encoding != NULL)
10560 xmlFree((xmlChar *) ctxt->encoding);
10561 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010562 }
10563 /*
10564 * UTF-8 encoding is handled natively
10565 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010566 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010567 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10568 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010569 if (ctxt->encoding != NULL)
10570 xmlFree((xmlChar *) ctxt->encoding);
10571 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010572 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010573 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010574 xmlCharEncodingHandlerPtr handler;
10575
10576 if (ctxt->input->encoding != NULL)
10577 xmlFree((xmlChar *) ctxt->input->encoding);
10578 ctxt->input->encoding = encoding;
10579
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010580 handler = xmlFindCharEncodingHandler((const char *) encoding);
10581 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010582 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10583 /* failed to convert */
10584 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10585 return(NULL);
10586 }
Owen Taylor3473f882001-02-23 17:55:21 +000010587 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010588 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010589 "Unsupported encoding %s\n", encoding);
10590 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010591 }
10592 }
10593 }
10594 return(encoding);
10595}
10596
10597/**
10598 * xmlParseSDDecl:
10599 * @ctxt: an XML parser context
10600 *
10601 * parse the XML standalone declaration
10602 *
10603 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010604 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010605 *
10606 * [ VC: Standalone Document Declaration ]
10607 * TODO The standalone document declaration must have the value "no"
10608 * if any external markup declarations contain declarations of:
10609 * - attributes with default values, if elements to which these
10610 * attributes apply appear in the document without specifications
10611 * of values for these attributes, or
10612 * - entities (other than amp, lt, gt, apos, quot), if references
10613 * to those entities appear in the document, or
10614 * - attributes with values subject to normalization, where the
10615 * attribute appears in the document with a value which will change
10616 * as a result of normalization, or
10617 * - element types with element content, if white space occurs directly
10618 * within any instance of those types.
10619 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010620 * Returns:
10621 * 1 if standalone="yes"
10622 * 0 if standalone="no"
10623 * -2 if standalone attribute is missing or invalid
10624 * (A standalone value of -2 means that the XML declaration was found,
10625 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010626 */
10627
10628int
10629xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010630 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010631
10632 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010633 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010634 SKIP(10);
10635 SKIP_BLANKS;
10636 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010637 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010638 return(standalone);
10639 }
10640 NEXT;
10641 SKIP_BLANKS;
10642 if (RAW == '\''){
10643 NEXT;
10644 if ((RAW == 'n') && (NXT(1) == 'o')) {
10645 standalone = 0;
10646 SKIP(2);
10647 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10648 (NXT(2) == 's')) {
10649 standalone = 1;
10650 SKIP(3);
10651 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010652 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010653 }
10654 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010655 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010656 } else
10657 NEXT;
10658 } else if (RAW == '"'){
10659 NEXT;
10660 if ((RAW == 'n') && (NXT(1) == 'o')) {
10661 standalone = 0;
10662 SKIP(2);
10663 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10664 (NXT(2) == 's')) {
10665 standalone = 1;
10666 SKIP(3);
10667 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010668 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010669 }
10670 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010671 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010672 } else
10673 NEXT;
10674 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010675 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010676 }
10677 }
10678 return(standalone);
10679}
10680
10681/**
10682 * xmlParseXMLDecl:
10683 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010684 *
Owen Taylor3473f882001-02-23 17:55:21 +000010685 * parse an XML declaration header
10686 *
10687 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10688 */
10689
10690void
10691xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10692 xmlChar *version;
10693
10694 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010695 * This value for standalone indicates that the document has an
10696 * XML declaration but it does not have a standalone attribute.
10697 * It will be overwritten later if a standalone attribute is found.
10698 */
10699 ctxt->input->standalone = -2;
10700
10701 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010702 * We know that '<?xml' is here.
10703 */
10704 SKIP(5);
10705
William M. Brack76e95df2003-10-18 16:20:14 +000010706 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10708 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010709 }
10710 SKIP_BLANKS;
10711
10712 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010713 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010714 */
10715 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010716 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010717 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010718 } else {
10719 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10720 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010721 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010722 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010723 if (ctxt->options & XML_PARSE_OLD10) {
10724 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10725 "Unsupported version '%s'\n",
10726 version);
10727 } else {
10728 if ((version[0] == '1') && ((version[1] == '.'))) {
10729 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10730 "Unsupported version '%s'\n",
10731 version, NULL);
10732 } else {
10733 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10734 "Unsupported version '%s'\n",
10735 version);
10736 }
10737 }
Daniel Veillard19840942001-11-29 16:11:38 +000010738 }
10739 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010740 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010741 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010742 }
Owen Taylor3473f882001-02-23 17:55:21 +000010743
10744 /*
10745 * We may have the encoding declaration
10746 */
William M. Brack76e95df2003-10-18 16:20:14 +000010747 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010748 if ((RAW == '?') && (NXT(1) == '>')) {
10749 SKIP(2);
10750 return;
10751 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010752 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010753 }
10754 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010755 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10756 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010757 /*
10758 * The XML REC instructs us to stop parsing right here
10759 */
10760 return;
10761 }
10762
10763 /*
10764 * We may have the standalone status.
10765 */
William M. Brack76e95df2003-10-18 16:20:14 +000010766 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010767 if ((RAW == '?') && (NXT(1) == '>')) {
10768 SKIP(2);
10769 return;
10770 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010771 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010772 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010773
10774 /*
10775 * We can grow the input buffer freely at that point
10776 */
10777 GROW;
10778
Owen Taylor3473f882001-02-23 17:55:21 +000010779 SKIP_BLANKS;
10780 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10781
10782 SKIP_BLANKS;
10783 if ((RAW == '?') && (NXT(1) == '>')) {
10784 SKIP(2);
10785 } else if (RAW == '>') {
10786 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010787 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010788 NEXT;
10789 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010790 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010791 MOVETO_ENDTAG(CUR_PTR);
10792 NEXT;
10793 }
10794}
10795
10796/**
10797 * xmlParseMisc:
10798 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010799 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010800 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010801 *
10802 * [27] Misc ::= Comment | PI | S
10803 */
10804
10805void
10806xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010807 while ((ctxt->instate != XML_PARSER_EOF) &&
10808 (((RAW == '<') && (NXT(1) == '?')) ||
10809 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10810 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010811 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010812 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010813 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010814 NEXT;
10815 } else
10816 xmlParseComment(ctxt);
10817 }
10818}
10819
10820/**
10821 * xmlParseDocument:
10822 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010823 *
Owen Taylor3473f882001-02-23 17:55:21 +000010824 * parse an XML document (and build a tree if using the standard SAX
10825 * interface).
10826 *
10827 * [1] document ::= prolog element Misc*
10828 *
10829 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10830 *
10831 * Returns 0, -1 in case of error. the parser context is augmented
10832 * as a result of the parsing.
10833 */
10834
10835int
10836xmlParseDocument(xmlParserCtxtPtr ctxt) {
10837 xmlChar start[4];
10838 xmlCharEncoding enc;
10839
10840 xmlInitParser();
10841
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010842 if ((ctxt == NULL) || (ctxt->input == NULL))
10843 return(-1);
10844
Owen Taylor3473f882001-02-23 17:55:21 +000010845 GROW;
10846
10847 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010848 * SAX: detecting the level.
10849 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010850 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010851
10852 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010853 * SAX: beginning of the document processing.
10854 */
10855 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10856 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010857 if (ctxt->instate == XML_PARSER_EOF)
10858 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010859
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010860 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010861 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010862 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010863 * Get the 4 first bytes and decode the charset
10864 * if enc != XML_CHAR_ENCODING_NONE
10865 * plug some encoding conversion routines.
10866 */
10867 start[0] = RAW;
10868 start[1] = NXT(1);
10869 start[2] = NXT(2);
10870 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010871 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010872 if (enc != XML_CHAR_ENCODING_NONE) {
10873 xmlSwitchEncoding(ctxt, enc);
10874 }
Owen Taylor3473f882001-02-23 17:55:21 +000010875 }
10876
10877
10878 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010879 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010880 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010881 }
10882
10883 /*
10884 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010885 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010886 * than just the first line, unless the amount of data is really
10887 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010888 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010889 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10890 GROW;
10891 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010892 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010893
10894 /*
10895 * Note that we will switch encoding on the fly.
10896 */
10897 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010898 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10899 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010900 /*
10901 * The XML REC instructs us to stop parsing right here
10902 */
10903 return(-1);
10904 }
10905 ctxt->standalone = ctxt->input->standalone;
10906 SKIP_BLANKS;
10907 } else {
10908 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10909 }
10910 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10911 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010912 if (ctxt->instate == XML_PARSER_EOF)
10913 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010914 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10915 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10916 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10917 }
Owen Taylor3473f882001-02-23 17:55:21 +000010918
10919 /*
10920 * The Misc part of the Prolog
10921 */
10922 GROW;
10923 xmlParseMisc(ctxt);
10924
10925 /*
10926 * Then possibly doc type declaration(s) and more Misc
10927 * (doctypedecl Misc*)?
10928 */
10929 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010930 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010931
10932 ctxt->inSubset = 1;
10933 xmlParseDocTypeDecl(ctxt);
10934 if (RAW == '[') {
10935 ctxt->instate = XML_PARSER_DTD;
10936 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010937 if (ctxt->instate == XML_PARSER_EOF)
10938 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010939 }
10940
10941 /*
10942 * Create and update the external subset.
10943 */
10944 ctxt->inSubset = 2;
10945 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10946 (!ctxt->disableSAX))
10947 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10948 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010949 if (ctxt->instate == XML_PARSER_EOF)
10950 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010951 ctxt->inSubset = 0;
10952
Daniel Veillardac4118d2008-01-11 05:27:32 +000010953 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010954
10955 ctxt->instate = XML_PARSER_PROLOG;
10956 xmlParseMisc(ctxt);
10957 }
10958
10959 /*
10960 * Time to start parsing the tree itself
10961 */
10962 GROW;
10963 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010964 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10965 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010966 } else {
10967 ctxt->instate = XML_PARSER_CONTENT;
10968 xmlParseElement(ctxt);
10969 ctxt->instate = XML_PARSER_EPILOG;
10970
10971
10972 /*
10973 * The Misc part at the end
10974 */
10975 xmlParseMisc(ctxt);
10976
Daniel Veillard561b7f82002-03-20 21:55:57 +000010977 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010978 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010979 }
10980 ctxt->instate = XML_PARSER_EOF;
10981 }
10982
10983 /*
10984 * SAX: end of the document processing.
10985 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010986 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010987 ctxt->sax->endDocument(ctxt->userData);
10988
Daniel Veillard5997aca2002-03-18 18:36:20 +000010989 /*
10990 * Remove locally kept entity definitions if the tree was not built
10991 */
10992 if ((ctxt->myDoc != NULL) &&
10993 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10994 xmlFreeDoc(ctxt->myDoc);
10995 ctxt->myDoc = NULL;
10996 }
10997
Daniel Veillardae0765b2008-07-31 19:54:59 +000010998 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10999 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
11000 if (ctxt->valid)
11001 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
11002 if (ctxt->nsWellFormed)
11003 ctxt->myDoc->properties |= XML_DOC_NSVALID;
11004 if (ctxt->options & XML_PARSE_OLD10)
11005 ctxt->myDoc->properties |= XML_DOC_OLD10;
11006 }
Daniel Veillardc7612992002-02-17 22:47:37 +000011007 if (! ctxt->wellFormed) {
11008 ctxt->valid = 0;
11009 return(-1);
11010 }
Owen Taylor3473f882001-02-23 17:55:21 +000011011 return(0);
11012}
11013
11014/**
11015 * xmlParseExtParsedEnt:
11016 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011017 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011018 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000011019 * An external general parsed entity is well-formed if it matches the
11020 * production labeled extParsedEnt.
11021 *
11022 * [78] extParsedEnt ::= TextDecl? content
11023 *
11024 * Returns 0, -1 in case of error. the parser context is augmented
11025 * as a result of the parsing.
11026 */
11027
11028int
11029xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11030 xmlChar start[4];
11031 xmlCharEncoding enc;
11032
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011033 if ((ctxt == NULL) || (ctxt->input == NULL))
11034 return(-1);
11035
Owen Taylor3473f882001-02-23 17:55:21 +000011036 xmlDefaultSAXHandlerInit();
11037
Daniel Veillard309f81d2003-09-23 09:02:53 +000011038 xmlDetectSAX2(ctxt);
11039
Owen Taylor3473f882001-02-23 17:55:21 +000011040 GROW;
11041
11042 /*
11043 * SAX: beginning of the document processing.
11044 */
11045 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11046 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11047
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011048 /*
Owen Taylor3473f882001-02-23 17:55:21 +000011049 * Get the 4 first bytes and decode the charset
11050 * if enc != XML_CHAR_ENCODING_NONE
11051 * plug some encoding conversion routines.
11052 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011053 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11054 start[0] = RAW;
11055 start[1] = NXT(1);
11056 start[2] = NXT(2);
11057 start[3] = NXT(3);
11058 enc = xmlDetectCharEncoding(start, 4);
11059 if (enc != XML_CHAR_ENCODING_NONE) {
11060 xmlSwitchEncoding(ctxt, enc);
11061 }
Owen Taylor3473f882001-02-23 17:55:21 +000011062 }
11063
11064
11065 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011066 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011067 }
11068
11069 /*
11070 * Check for the XMLDecl in the Prolog.
11071 */
11072 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000011073 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011074
11075 /*
11076 * Note that we will switch encoding on the fly.
11077 */
11078 xmlParseXMLDecl(ctxt);
11079 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11080 /*
11081 * The XML REC instructs us to stop parsing right here
11082 */
11083 return(-1);
11084 }
11085 SKIP_BLANKS;
11086 } else {
11087 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11088 }
11089 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11090 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011091 if (ctxt->instate == XML_PARSER_EOF)
11092 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000011093
11094 /*
11095 * Doing validity checking on chunk doesn't make sense
11096 */
11097 ctxt->instate = XML_PARSER_CONTENT;
11098 ctxt->validate = 0;
11099 ctxt->loadsubset = 0;
11100 ctxt->depth = 0;
11101
11102 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011103 if (ctxt->instate == XML_PARSER_EOF)
11104 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011105
Owen Taylor3473f882001-02-23 17:55:21 +000011106 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011107 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011108 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011109 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011110 }
11111
11112 /*
11113 * SAX: end of the document processing.
11114 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011115 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011116 ctxt->sax->endDocument(ctxt->userData);
11117
11118 if (! ctxt->wellFormed) return(-1);
11119 return(0);
11120}
11121
Daniel Veillard73b013f2003-09-30 12:36:01 +000011122#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011123/************************************************************************
11124 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011125 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000011126 * *
11127 ************************************************************************/
11128
11129/**
11130 * xmlParseLookupSequence:
11131 * @ctxt: an XML parser context
11132 * @first: the first char to lookup
11133 * @next: the next char to lookup or zero
11134 * @third: the next char to lookup or zero
11135 *
11136 * Try to find if a sequence (first, next, third) or just (first next) or
11137 * (first) is available in the input stream.
11138 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11139 * to avoid rescanning sequences of bytes, it DOES change the state of the
11140 * parser, do not use liberally.
11141 *
11142 * Returns the index to the current parsing point if the full sequence
11143 * is available, -1 otherwise.
11144 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011145static int
Owen Taylor3473f882001-02-23 17:55:21 +000011146xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11147 xmlChar next, xmlChar third) {
11148 int base, len;
11149 xmlParserInputPtr in;
11150 const xmlChar *buf;
11151
11152 in = ctxt->input;
11153 if (in == NULL) return(-1);
11154 base = in->cur - in->base;
11155 if (base < 0) return(-1);
11156 if (ctxt->checkIndex > base)
11157 base = ctxt->checkIndex;
11158 if (in->buf == NULL) {
11159 buf = in->base;
11160 len = in->length;
11161 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011162 buf = xmlBufContent(in->buf->buffer);
11163 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011164 }
11165 /* take into account the sequence length */
11166 if (third) len -= 2;
11167 else if (next) len --;
11168 for (;base < len;base++) {
11169 if (buf[base] == first) {
11170 if (third != 0) {
11171 if ((buf[base + 1] != next) ||
11172 (buf[base + 2] != third)) continue;
11173 } else if (next != 0) {
11174 if (buf[base + 1] != next) continue;
11175 }
11176 ctxt->checkIndex = 0;
11177#ifdef DEBUG_PUSH
11178 if (next == 0)
11179 xmlGenericError(xmlGenericErrorContext,
11180 "PP: lookup '%c' found at %d\n",
11181 first, base);
11182 else if (third == 0)
11183 xmlGenericError(xmlGenericErrorContext,
11184 "PP: lookup '%c%c' found at %d\n",
11185 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011186 else
Owen Taylor3473f882001-02-23 17:55:21 +000011187 xmlGenericError(xmlGenericErrorContext,
11188 "PP: lookup '%c%c%c' found at %d\n",
11189 first, next, third, base);
11190#endif
11191 return(base - (in->cur - in->base));
11192 }
11193 }
11194 ctxt->checkIndex = base;
11195#ifdef DEBUG_PUSH
11196 if (next == 0)
11197 xmlGenericError(xmlGenericErrorContext,
11198 "PP: lookup '%c' failed\n", first);
11199 else if (third == 0)
11200 xmlGenericError(xmlGenericErrorContext,
11201 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011202 else
Owen Taylor3473f882001-02-23 17:55:21 +000011203 xmlGenericError(xmlGenericErrorContext,
11204 "PP: lookup '%c%c%c' failed\n", first, next, third);
11205#endif
11206 return(-1);
11207}
11208
11209/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011210 * xmlParseGetLasts:
11211 * @ctxt: an XML parser context
11212 * @lastlt: pointer to store the last '<' from the input
11213 * @lastgt: pointer to store the last '>' from the input
11214 *
11215 * Lookup the last < and > in the current chunk
11216 */
11217static void
11218xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11219 const xmlChar **lastgt) {
11220 const xmlChar *tmp;
11221
11222 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11223 xmlGenericError(xmlGenericErrorContext,
11224 "Internal error: xmlParseGetLasts\n");
11225 return;
11226 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011227 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011228 tmp = ctxt->input->end;
11229 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011230 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011231 if (tmp < ctxt->input->base) {
11232 *lastlt = NULL;
11233 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011234 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011235 *lastlt = tmp;
11236 tmp++;
11237 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11238 if (*tmp == '\'') {
11239 tmp++;
11240 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11241 if (tmp < ctxt->input->end) tmp++;
11242 } else if (*tmp == '"') {
11243 tmp++;
11244 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11245 if (tmp < ctxt->input->end) tmp++;
11246 } else
11247 tmp++;
11248 }
11249 if (tmp < ctxt->input->end)
11250 *lastgt = tmp;
11251 else {
11252 tmp = *lastlt;
11253 tmp--;
11254 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11255 if (tmp >= ctxt->input->base)
11256 *lastgt = tmp;
11257 else
11258 *lastgt = NULL;
11259 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011260 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011261 } else {
11262 *lastlt = NULL;
11263 *lastgt = NULL;
11264 }
11265}
11266/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011267 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011268 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011269 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011270 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011271 *
11272 * Check that the block of characters is okay as SCdata content [20]
11273 *
11274 * Returns the number of bytes to pass if okay, a negative index where an
11275 * UTF-8 error occured otherwise
11276 */
11277static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011278xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011279 int ix;
11280 unsigned char c;
11281 int codepoint;
11282
11283 if ((utf == NULL) || (len <= 0))
11284 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011285
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011286 for (ix = 0; ix < len;) { /* string is 0-terminated */
11287 c = utf[ix];
11288 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11289 if (c >= 0x20)
11290 ix++;
11291 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11292 ix++;
11293 else
11294 return(-ix);
11295 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011296 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011297 if ((utf[ix+1] & 0xc0 ) != 0x80)
11298 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011299 codepoint = (utf[ix] & 0x1f) << 6;
11300 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011301 if (!xmlIsCharQ(codepoint))
11302 return(-ix);
11303 ix += 2;
11304 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011305 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011306 if (((utf[ix+1] & 0xc0) != 0x80) ||
11307 ((utf[ix+2] & 0xc0) != 0x80))
11308 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011309 codepoint = (utf[ix] & 0xf) << 12;
11310 codepoint |= (utf[ix+1] & 0x3f) << 6;
11311 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011312 if (!xmlIsCharQ(codepoint))
11313 return(-ix);
11314 ix += 3;
11315 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011316 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011317 if (((utf[ix+1] & 0xc0) != 0x80) ||
11318 ((utf[ix+2] & 0xc0) != 0x80) ||
11319 ((utf[ix+3] & 0xc0) != 0x80))
11320 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011321 codepoint = (utf[ix] & 0x7) << 18;
11322 codepoint |= (utf[ix+1] & 0x3f) << 12;
11323 codepoint |= (utf[ix+2] & 0x3f) << 6;
11324 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011325 if (!xmlIsCharQ(codepoint))
11326 return(-ix);
11327 ix += 4;
11328 } else /* unknown encoding */
11329 return(-ix);
11330 }
11331 return(ix);
11332}
11333
11334/**
Owen Taylor3473f882001-02-23 17:55:21 +000011335 * xmlParseTryOrFinish:
11336 * @ctxt: an XML parser context
11337 * @terminate: last chunk indicator
11338 *
11339 * Try to progress on parsing
11340 *
11341 * Returns zero if no parsing was possible
11342 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011343static int
Owen Taylor3473f882001-02-23 17:55:21 +000011344xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11345 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011346 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011347 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011348 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011349
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011350 if (ctxt->input == NULL)
11351 return(0);
11352
Owen Taylor3473f882001-02-23 17:55:21 +000011353#ifdef DEBUG_PUSH
11354 switch (ctxt->instate) {
11355 case XML_PARSER_EOF:
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: try EOF\n"); break;
11358 case XML_PARSER_START:
11359 xmlGenericError(xmlGenericErrorContext,
11360 "PP: try START\n"); break;
11361 case XML_PARSER_MISC:
11362 xmlGenericError(xmlGenericErrorContext,
11363 "PP: try MISC\n");break;
11364 case XML_PARSER_COMMENT:
11365 xmlGenericError(xmlGenericErrorContext,
11366 "PP: try COMMENT\n");break;
11367 case XML_PARSER_PROLOG:
11368 xmlGenericError(xmlGenericErrorContext,
11369 "PP: try PROLOG\n");break;
11370 case XML_PARSER_START_TAG:
11371 xmlGenericError(xmlGenericErrorContext,
11372 "PP: try START_TAG\n");break;
11373 case XML_PARSER_CONTENT:
11374 xmlGenericError(xmlGenericErrorContext,
11375 "PP: try CONTENT\n");break;
11376 case XML_PARSER_CDATA_SECTION:
11377 xmlGenericError(xmlGenericErrorContext,
11378 "PP: try CDATA_SECTION\n");break;
11379 case XML_PARSER_END_TAG:
11380 xmlGenericError(xmlGenericErrorContext,
11381 "PP: try END_TAG\n");break;
11382 case XML_PARSER_ENTITY_DECL:
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: try ENTITY_DECL\n");break;
11385 case XML_PARSER_ENTITY_VALUE:
11386 xmlGenericError(xmlGenericErrorContext,
11387 "PP: try ENTITY_VALUE\n");break;
11388 case XML_PARSER_ATTRIBUTE_VALUE:
11389 xmlGenericError(xmlGenericErrorContext,
11390 "PP: try ATTRIBUTE_VALUE\n");break;
11391 case XML_PARSER_DTD:
11392 xmlGenericError(xmlGenericErrorContext,
11393 "PP: try DTD\n");break;
11394 case XML_PARSER_EPILOG:
11395 xmlGenericError(xmlGenericErrorContext,
11396 "PP: try EPILOG\n");break;
11397 case XML_PARSER_PI:
11398 xmlGenericError(xmlGenericErrorContext,
11399 "PP: try PI\n");break;
11400 case XML_PARSER_IGNORE:
11401 xmlGenericError(xmlGenericErrorContext,
11402 "PP: try IGNORE\n");break;
11403 }
11404#endif
11405
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011406 if ((ctxt->input != NULL) &&
11407 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011408 xmlSHRINK(ctxt);
11409 ctxt->checkIndex = 0;
11410 }
11411 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011412
Daniel Veillarde50ba812013-04-11 15:54:51 +080011413 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011414 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011415 return(0);
11416
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011417
Owen Taylor3473f882001-02-23 17:55:21 +000011418 /*
11419 * Pop-up of finished entities.
11420 */
11421 while ((RAW == 0) && (ctxt->inputNr > 1))
11422 xmlPopInput(ctxt);
11423
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011424 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011425 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011426 avail = ctxt->input->length -
11427 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011428 else {
11429 /*
11430 * If we are operating on converted input, try to flush
11431 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011432 * buffer. But do not do this in document start where
11433 * encoding="..." may not have been read and we work on a
11434 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011435 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011436 if ((ctxt->instate != XML_PARSER_START) &&
11437 (ctxt->input->buf->raw != NULL) &&
11438 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011439 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11440 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011441 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011442
11443 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011444 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11445 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011446 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011447 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011448 (ctxt->input->cur - ctxt->input->base);
11449 }
Owen Taylor3473f882001-02-23 17:55:21 +000011450 if (avail < 1)
11451 goto done;
11452 switch (ctxt->instate) {
11453 case XML_PARSER_EOF:
11454 /*
11455 * Document parsing is done !
11456 */
11457 goto done;
11458 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011459 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11460 xmlChar start[4];
11461 xmlCharEncoding enc;
11462
11463 /*
11464 * Very first chars read from the document flow.
11465 */
11466 if (avail < 4)
11467 goto done;
11468
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011469 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011470 * Get the 4 first bytes and decode the charset
11471 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011472 * plug some encoding conversion routines,
11473 * else xmlSwitchEncoding will set to (default)
11474 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011475 */
11476 start[0] = RAW;
11477 start[1] = NXT(1);
11478 start[2] = NXT(2);
11479 start[3] = NXT(3);
11480 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011481 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011482 break;
11483 }
Owen Taylor3473f882001-02-23 17:55:21 +000011484
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011485 if (avail < 2)
11486 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011487 cur = ctxt->input->cur[0];
11488 next = ctxt->input->cur[1];
11489 if (cur == 0) {
11490 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11491 ctxt->sax->setDocumentLocator(ctxt->userData,
11492 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011493 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011494 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011495#ifdef DEBUG_PUSH
11496 xmlGenericError(xmlGenericErrorContext,
11497 "PP: entering EOF\n");
11498#endif
11499 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11500 ctxt->sax->endDocument(ctxt->userData);
11501 goto done;
11502 }
11503 if ((cur == '<') && (next == '?')) {
11504 /* PI or XML decl */
11505 if (avail < 5) return(ret);
11506 if ((!terminate) &&
11507 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11508 return(ret);
11509 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11510 ctxt->sax->setDocumentLocator(ctxt->userData,
11511 &xmlDefaultSAXLocator);
11512 if ((ctxt->input->cur[2] == 'x') &&
11513 (ctxt->input->cur[3] == 'm') &&
11514 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011515 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011516 ret += 5;
11517#ifdef DEBUG_PUSH
11518 xmlGenericError(xmlGenericErrorContext,
11519 "PP: Parsing XML Decl\n");
11520#endif
11521 xmlParseXMLDecl(ctxt);
11522 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11523 /*
11524 * The XML REC instructs us to stop parsing right
11525 * here
11526 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011527 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011528 return(0);
11529 }
11530 ctxt->standalone = ctxt->input->standalone;
11531 if ((ctxt->encoding == NULL) &&
11532 (ctxt->input->encoding != NULL))
11533 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11534 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11535 (!ctxt->disableSAX))
11536 ctxt->sax->startDocument(ctxt->userData);
11537 ctxt->instate = XML_PARSER_MISC;
11538#ifdef DEBUG_PUSH
11539 xmlGenericError(xmlGenericErrorContext,
11540 "PP: entering MISC\n");
11541#endif
11542 } else {
11543 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11544 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11545 (!ctxt->disableSAX))
11546 ctxt->sax->startDocument(ctxt->userData);
11547 ctxt->instate = XML_PARSER_MISC;
11548#ifdef DEBUG_PUSH
11549 xmlGenericError(xmlGenericErrorContext,
11550 "PP: entering MISC\n");
11551#endif
11552 }
11553 } else {
11554 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11555 ctxt->sax->setDocumentLocator(ctxt->userData,
11556 &xmlDefaultSAXLocator);
11557 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011558 if (ctxt->version == NULL) {
11559 xmlErrMemory(ctxt, NULL);
11560 break;
11561 }
Owen Taylor3473f882001-02-23 17:55:21 +000011562 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11563 (!ctxt->disableSAX))
11564 ctxt->sax->startDocument(ctxt->userData);
11565 ctxt->instate = XML_PARSER_MISC;
11566#ifdef DEBUG_PUSH
11567 xmlGenericError(xmlGenericErrorContext,
11568 "PP: entering MISC\n");
11569#endif
11570 }
11571 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011572 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011573 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011574 const xmlChar *prefix = NULL;
11575 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011576 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011577
11578 if ((avail < 2) && (ctxt->inputNr == 1))
11579 goto done;
11580 cur = ctxt->input->cur[0];
11581 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011582 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011583 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011584 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11585 ctxt->sax->endDocument(ctxt->userData);
11586 goto done;
11587 }
11588 if (!terminate) {
11589 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011590 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011591 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011592 goto done;
11593 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11594 goto done;
11595 }
11596 }
11597 if (ctxt->spaceNr == 0)
11598 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011599 else if (*ctxt->space == -2)
11600 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011601 else
11602 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011603#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011604 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011605#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011606 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011607#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011608 else
11609 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011610#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011611 if (ctxt->instate == XML_PARSER_EOF)
11612 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011613 if (name == NULL) {
11614 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011615 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011616 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11617 ctxt->sax->endDocument(ctxt->userData);
11618 goto done;
11619 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011620#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011621 /*
11622 * [ VC: Root Element Type ]
11623 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011624 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011625 */
11626 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11627 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11628 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011629#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011630
11631 /*
11632 * Check for an Empty Element.
11633 */
11634 if ((RAW == '/') && (NXT(1) == '>')) {
11635 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011636
11637 if (ctxt->sax2) {
11638 if ((ctxt->sax != NULL) &&
11639 (ctxt->sax->endElementNs != NULL) &&
11640 (!ctxt->disableSAX))
11641 ctxt->sax->endElementNs(ctxt->userData, name,
11642 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011643 if (ctxt->nsNr - nsNr > 0)
11644 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011645#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011646 } else {
11647 if ((ctxt->sax != NULL) &&
11648 (ctxt->sax->endElement != NULL) &&
11649 (!ctxt->disableSAX))
11650 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011651#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011652 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011653 if (ctxt->instate == XML_PARSER_EOF)
11654 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011655 spacePop(ctxt);
11656 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011657 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011658 } else {
11659 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011660 }
Daniel Veillard65686452012-07-19 18:25:01 +080011661 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011662 break;
11663 }
11664 if (RAW == '>') {
11665 NEXT;
11666 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011667 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011668 "Couldn't find end of Start Tag %s\n",
11669 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011670 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011671 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011672 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011673 if (ctxt->sax2)
11674 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011675#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011676 else
11677 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011678#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011679
Daniel Veillarda880b122003-04-21 21:36:41 +000011680 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011681 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011682 break;
11683 }
11684 case XML_PARSER_CONTENT: {
11685 const xmlChar *test;
11686 unsigned int cons;
11687 if ((avail < 2) && (ctxt->inputNr == 1))
11688 goto done;
11689 cur = ctxt->input->cur[0];
11690 next = ctxt->input->cur[1];
11691
11692 test = CUR_PTR;
11693 cons = ctxt->input->consumed;
11694 if ((cur == '<') && (next == '/')) {
11695 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011696 break;
11697 } else if ((cur == '<') && (next == '?')) {
11698 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011699 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11700 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011701 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011702 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011703 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011704 ctxt->instate = XML_PARSER_CONTENT;
11705 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011706 } else if ((cur == '<') && (next != '!')) {
11707 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011708 break;
11709 } else if ((cur == '<') && (next == '!') &&
11710 (ctxt->input->cur[2] == '-') &&
11711 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011712 int term;
11713
11714 if (avail < 4)
11715 goto done;
11716 ctxt->input->cur += 4;
11717 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11718 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011719 if ((!terminate) && (term < 0)) {
11720 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011721 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011722 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011723 xmlParseComment(ctxt);
11724 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011725 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011726 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11727 (ctxt->input->cur[2] == '[') &&
11728 (ctxt->input->cur[3] == 'C') &&
11729 (ctxt->input->cur[4] == 'D') &&
11730 (ctxt->input->cur[5] == 'A') &&
11731 (ctxt->input->cur[6] == 'T') &&
11732 (ctxt->input->cur[7] == 'A') &&
11733 (ctxt->input->cur[8] == '[')) {
11734 SKIP(9);
11735 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011736 break;
11737 } else if ((cur == '<') && (next == '!') &&
11738 (avail < 9)) {
11739 goto done;
11740 } else if (cur == '&') {
11741 if ((!terminate) &&
11742 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11743 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011744 xmlParseReference(ctxt);
11745 } else {
11746 /* TODO Avoid the extra copy, handle directly !!! */
11747 /*
11748 * Goal of the following test is:
11749 * - minimize calls to the SAX 'character' callback
11750 * when they are mergeable
11751 * - handle an problem for isBlank when we only parse
11752 * a sequence of blank chars and the next one is
11753 * not available to check against '<' presence.
11754 * - tries to homogenize the differences in SAX
11755 * callbacks between the push and pull versions
11756 * of the parser.
11757 */
11758 if ((ctxt->inputNr == 1) &&
11759 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11760 if (!terminate) {
11761 if (ctxt->progressive) {
11762 if ((lastlt == NULL) ||
11763 (ctxt->input->cur > lastlt))
11764 goto done;
11765 } else if (xmlParseLookupSequence(ctxt,
11766 '<', 0, 0) < 0) {
11767 goto done;
11768 }
11769 }
11770 }
11771 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011772 xmlParseCharData(ctxt, 0);
11773 }
11774 /*
11775 * Pop-up of finished entities.
11776 */
11777 while ((RAW == 0) && (ctxt->inputNr > 1))
11778 xmlPopInput(ctxt);
11779 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011780 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11781 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011782 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011783 break;
11784 }
11785 break;
11786 }
11787 case XML_PARSER_END_TAG:
11788 if (avail < 2)
11789 goto done;
11790 if (!terminate) {
11791 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011792 /* > can be found unescaped in attribute values */
11793 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011794 goto done;
11795 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11796 goto done;
11797 }
11798 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011799 if (ctxt->sax2) {
11800 xmlParseEndTag2(ctxt,
11801 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11802 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011803 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011804 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011805 }
11806#ifdef LIBXML_SAX1_ENABLED
11807 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011808 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011809#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011810 if (ctxt->instate == XML_PARSER_EOF) {
11811 /* Nothing */
11812 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011813 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011814 } else {
11815 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011816 }
11817 break;
11818 case XML_PARSER_CDATA_SECTION: {
11819 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011820 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011821 * cdataBlock merge back contiguous callbacks.
11822 */
11823 int base;
11824
11825 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11826 if (base < 0) {
11827 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011828 int tmp;
11829
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011830 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011831 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011832 if (tmp < 0) {
11833 tmp = -tmp;
11834 ctxt->input->cur += tmp;
11835 goto encoding_error;
11836 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011837 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11838 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011839 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011840 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011841 else if (ctxt->sax->characters != NULL)
11842 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011843 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011844 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011845 if (ctxt->instate == XML_PARSER_EOF)
11846 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011847 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011848 ctxt->checkIndex = 0;
11849 }
11850 goto done;
11851 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011852 int tmp;
11853
David Kilzer4f8606c2016-01-05 13:38:09 -080011854 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011855 if ((tmp < 0) || (tmp != base)) {
11856 tmp = -tmp;
11857 ctxt->input->cur += tmp;
11858 goto encoding_error;
11859 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011860 if ((ctxt->sax != NULL) && (base == 0) &&
11861 (ctxt->sax->cdataBlock != NULL) &&
11862 (!ctxt->disableSAX)) {
11863 /*
11864 * Special case to provide identical behaviour
11865 * between pull and push parsers on enpty CDATA
11866 * sections
11867 */
11868 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11869 (!strncmp((const char *)&ctxt->input->cur[-9],
11870 "<![CDATA[", 9)))
11871 ctxt->sax->cdataBlock(ctxt->userData,
11872 BAD_CAST "", 0);
11873 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011874 (!ctxt->disableSAX)) {
11875 if (ctxt->sax->cdataBlock != NULL)
11876 ctxt->sax->cdataBlock(ctxt->userData,
11877 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011878 else if (ctxt->sax->characters != NULL)
11879 ctxt->sax->characters(ctxt->userData,
11880 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011881 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011882 if (ctxt->instate == XML_PARSER_EOF)
11883 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011884 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011885 ctxt->checkIndex = 0;
11886 ctxt->instate = XML_PARSER_CONTENT;
11887#ifdef DEBUG_PUSH
11888 xmlGenericError(xmlGenericErrorContext,
11889 "PP: entering CONTENT\n");
11890#endif
11891 }
11892 break;
11893 }
Owen Taylor3473f882001-02-23 17:55:21 +000011894 case XML_PARSER_MISC:
11895 SKIP_BLANKS;
11896 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011897 avail = ctxt->input->length -
11898 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011899 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011900 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011901 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011902 if (avail < 2)
11903 goto done;
11904 cur = ctxt->input->cur[0];
11905 next = ctxt->input->cur[1];
11906 if ((cur == '<') && (next == '?')) {
11907 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011908 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11909 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011910 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011911 }
Owen Taylor3473f882001-02-23 17:55:21 +000011912#ifdef DEBUG_PUSH
11913 xmlGenericError(xmlGenericErrorContext,
11914 "PP: Parsing PI\n");
11915#endif
11916 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011917 if (ctxt->instate == XML_PARSER_EOF)
11918 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011919 ctxt->instate = XML_PARSER_MISC;
11920 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011921 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011922 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011923 (ctxt->input->cur[2] == '-') &&
11924 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011925 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011926 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11927 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011928 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011929 }
Owen Taylor3473f882001-02-23 17:55:21 +000011930#ifdef DEBUG_PUSH
11931 xmlGenericError(xmlGenericErrorContext,
11932 "PP: Parsing Comment\n");
11933#endif
11934 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011935 if (ctxt->instate == XML_PARSER_EOF)
11936 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011937 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011938 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011939 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011940 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011941 (ctxt->input->cur[2] == 'D') &&
11942 (ctxt->input->cur[3] == 'O') &&
11943 (ctxt->input->cur[4] == 'C') &&
11944 (ctxt->input->cur[5] == 'T') &&
11945 (ctxt->input->cur[6] == 'Y') &&
11946 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011947 (ctxt->input->cur[8] == 'E')) {
11948 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011949 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11950 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011951 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011952 }
Owen Taylor3473f882001-02-23 17:55:21 +000011953#ifdef DEBUG_PUSH
11954 xmlGenericError(xmlGenericErrorContext,
11955 "PP: Parsing internal subset\n");
11956#endif
11957 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011958 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011959 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011960 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011961 if (ctxt->instate == XML_PARSER_EOF)
11962 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011963 if (RAW == '[') {
11964 ctxt->instate = XML_PARSER_DTD;
11965#ifdef DEBUG_PUSH
11966 xmlGenericError(xmlGenericErrorContext,
11967 "PP: entering DTD\n");
11968#endif
11969 } else {
11970 /*
11971 * Create and update the external subset.
11972 */
11973 ctxt->inSubset = 2;
11974 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11975 (ctxt->sax->externalSubset != NULL))
11976 ctxt->sax->externalSubset(ctxt->userData,
11977 ctxt->intSubName, ctxt->extSubSystem,
11978 ctxt->extSubURI);
11979 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011980 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011981 ctxt->instate = XML_PARSER_PROLOG;
11982#ifdef DEBUG_PUSH
11983 xmlGenericError(xmlGenericErrorContext,
11984 "PP: entering PROLOG\n");
11985#endif
11986 }
11987 } else if ((cur == '<') && (next == '!') &&
11988 (avail < 9)) {
11989 goto done;
11990 } else {
11991 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011992 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011993 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011994#ifdef DEBUG_PUSH
11995 xmlGenericError(xmlGenericErrorContext,
11996 "PP: entering START_TAG\n");
11997#endif
11998 }
11999 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012000 case XML_PARSER_PROLOG:
12001 SKIP_BLANKS;
12002 if (ctxt->input->buf == NULL)
12003 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12004 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012005 avail = xmlBufUse(ctxt->input->buf->buffer) -
12006 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012007 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000012008 goto done;
12009 cur = ctxt->input->cur[0];
12010 next = ctxt->input->cur[1];
12011 if ((cur == '<') && (next == '?')) {
12012 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080012013 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12014 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000012015 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080012016 }
Owen Taylor3473f882001-02-23 17:55:21 +000012017#ifdef DEBUG_PUSH
12018 xmlGenericError(xmlGenericErrorContext,
12019 "PP: Parsing PI\n");
12020#endif
12021 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012022 if (ctxt->instate == XML_PARSER_EOF)
12023 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080012024 ctxt->instate = XML_PARSER_PROLOG;
12025 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012026 } else if ((cur == '<') && (next == '!') &&
12027 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12028 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080012029 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12030 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012031 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080012032 }
Owen Taylor3473f882001-02-23 17:55:21 +000012033#ifdef DEBUG_PUSH
12034 xmlGenericError(xmlGenericErrorContext,
12035 "PP: Parsing Comment\n");
12036#endif
12037 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012038 if (ctxt->instate == XML_PARSER_EOF)
12039 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012040 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080012041 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012042 } else if ((cur == '<') && (next == '!') &&
12043 (avail < 4)) {
12044 goto done;
12045 } else {
12046 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000012047 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080012048 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000012049 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000012050#ifdef DEBUG_PUSH
12051 xmlGenericError(xmlGenericErrorContext,
12052 "PP: entering START_TAG\n");
12053#endif
12054 }
12055 break;
12056 case XML_PARSER_EPILOG:
12057 SKIP_BLANKS;
12058 if (ctxt->input->buf == NULL)
12059 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12060 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012061 avail = xmlBufUse(ctxt->input->buf->buffer) -
12062 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000012063 if (avail < 2)
12064 goto done;
12065 cur = ctxt->input->cur[0];
12066 next = ctxt->input->cur[1];
12067 if ((cur == '<') && (next == '?')) {
12068 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080012069 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12070 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000012071 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080012072 }
Owen Taylor3473f882001-02-23 17:55:21 +000012073#ifdef DEBUG_PUSH
12074 xmlGenericError(xmlGenericErrorContext,
12075 "PP: Parsing PI\n");
12076#endif
12077 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012078 if (ctxt->instate == XML_PARSER_EOF)
12079 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012080 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080012081 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012082 } else if ((cur == '<') && (next == '!') &&
12083 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12084 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080012085 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12086 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012087 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080012088 }
Owen Taylor3473f882001-02-23 17:55:21 +000012089#ifdef DEBUG_PUSH
12090 xmlGenericError(xmlGenericErrorContext,
12091 "PP: Parsing Comment\n");
12092#endif
12093 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012094 if (ctxt->instate == XML_PARSER_EOF)
12095 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012096 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080012097 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012098 } else if ((cur == '<') && (next == '!') &&
12099 (avail < 4)) {
12100 goto done;
12101 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012102 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080012103 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012104#ifdef DEBUG_PUSH
12105 xmlGenericError(xmlGenericErrorContext,
12106 "PP: entering EOF\n");
12107#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012108 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012109 ctxt->sax->endDocument(ctxt->userData);
12110 goto done;
12111 }
12112 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012113 case XML_PARSER_DTD: {
12114 /*
12115 * Sorry but progressive parsing of the internal subset
12116 * is not expected to be supported. We first check that
12117 * the full content of the internal subset is available and
12118 * the parsing is launched only at that point.
12119 * Internal subset ends up with "']' S? '>'" in an unescaped
12120 * section and not in a ']]>' sequence which are conditional
12121 * sections (whoever argued to keep that crap in XML deserve
12122 * a place in hell !).
12123 */
12124 int base, i;
12125 xmlChar *buf;
12126 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012127 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000012128
12129 base = ctxt->input->cur - ctxt->input->base;
12130 if (base < 0) return(0);
12131 if (ctxt->checkIndex > base)
12132 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012133 buf = xmlBufContent(ctxt->input->buf->buffer);
12134 use = xmlBufUse(ctxt->input->buf->buffer);
12135 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000012136 if (quote != 0) {
12137 if (buf[base] == quote)
12138 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012139 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000012140 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012141 if ((quote == 0) && (buf[base] == '<')) {
12142 int found = 0;
12143 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012144 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000012145 (buf[base + 1] == '!') &&
12146 (buf[base + 2] == '-') &&
12147 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012148 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000012149 if ((buf[base] == '-') &&
12150 (buf[base + 1] == '-') &&
12151 (buf[base + 2] == '>')) {
12152 found = 1;
12153 base += 2;
12154 break;
12155 }
12156 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012157 if (!found) {
12158#if 0
12159 fprintf(stderr, "unfinished comment\n");
12160#endif
12161 break; /* for */
12162 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012163 continue;
12164 }
12165 }
Owen Taylor3473f882001-02-23 17:55:21 +000012166 if (buf[base] == '"') {
12167 quote = '"';
12168 continue;
12169 }
12170 if (buf[base] == '\'') {
12171 quote = '\'';
12172 continue;
12173 }
12174 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012175#if 0
12176 fprintf(stderr, "%c%c%c%c: ", buf[base],
12177 buf[base + 1], buf[base + 2], buf[base + 3]);
12178#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012179 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012180 break;
12181 if (buf[base + 1] == ']') {
12182 /* conditional crap, skip both ']' ! */
12183 base++;
12184 continue;
12185 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012186 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012187 if (buf[base + i] == '>') {
12188#if 0
12189 fprintf(stderr, "found\n");
12190#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012191 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012192 }
12193 if (!IS_BLANK_CH(buf[base + i])) {
12194#if 0
12195 fprintf(stderr, "not found\n");
12196#endif
12197 goto not_end_of_int_subset;
12198 }
Owen Taylor3473f882001-02-23 17:55:21 +000012199 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012200#if 0
12201 fprintf(stderr, "end of stream\n");
12202#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012203 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012204
Owen Taylor3473f882001-02-23 17:55:21 +000012205 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012206not_end_of_int_subset:
12207 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012208 }
12209 /*
12210 * We didn't found the end of the Internal subset
12211 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012212 if (quote == 0)
12213 ctxt->checkIndex = base;
12214 else
12215 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012216#ifdef DEBUG_PUSH
12217 if (next == 0)
12218 xmlGenericError(xmlGenericErrorContext,
12219 "PP: lookup of int subset end filed\n");
12220#endif
12221 goto done;
12222
12223found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012224 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012225 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012226 if (ctxt->instate == XML_PARSER_EOF)
12227 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012228 ctxt->inSubset = 2;
12229 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12230 (ctxt->sax->externalSubset != NULL))
12231 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12232 ctxt->extSubSystem, ctxt->extSubURI);
12233 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012234 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012235 if (ctxt->instate == XML_PARSER_EOF)
12236 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012237 ctxt->instate = XML_PARSER_PROLOG;
12238 ctxt->checkIndex = 0;
12239#ifdef DEBUG_PUSH
12240 xmlGenericError(xmlGenericErrorContext,
12241 "PP: entering PROLOG\n");
12242#endif
12243 break;
12244 }
12245 case XML_PARSER_COMMENT:
12246 xmlGenericError(xmlGenericErrorContext,
12247 "PP: internal error, state == COMMENT\n");
12248 ctxt->instate = XML_PARSER_CONTENT;
12249#ifdef DEBUG_PUSH
12250 xmlGenericError(xmlGenericErrorContext,
12251 "PP: entering CONTENT\n");
12252#endif
12253 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012254 case XML_PARSER_IGNORE:
12255 xmlGenericError(xmlGenericErrorContext,
12256 "PP: internal error, state == IGNORE");
12257 ctxt->instate = XML_PARSER_DTD;
12258#ifdef DEBUG_PUSH
12259 xmlGenericError(xmlGenericErrorContext,
12260 "PP: entering DTD\n");
12261#endif
12262 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012263 case XML_PARSER_PI:
12264 xmlGenericError(xmlGenericErrorContext,
12265 "PP: internal error, state == PI\n");
12266 ctxt->instate = XML_PARSER_CONTENT;
12267#ifdef DEBUG_PUSH
12268 xmlGenericError(xmlGenericErrorContext,
12269 "PP: entering CONTENT\n");
12270#endif
12271 break;
12272 case XML_PARSER_ENTITY_DECL:
12273 xmlGenericError(xmlGenericErrorContext,
12274 "PP: internal error, state == ENTITY_DECL\n");
12275 ctxt->instate = XML_PARSER_DTD;
12276#ifdef DEBUG_PUSH
12277 xmlGenericError(xmlGenericErrorContext,
12278 "PP: entering DTD\n");
12279#endif
12280 break;
12281 case XML_PARSER_ENTITY_VALUE:
12282 xmlGenericError(xmlGenericErrorContext,
12283 "PP: internal error, state == ENTITY_VALUE\n");
12284 ctxt->instate = XML_PARSER_CONTENT;
12285#ifdef DEBUG_PUSH
12286 xmlGenericError(xmlGenericErrorContext,
12287 "PP: entering DTD\n");
12288#endif
12289 break;
12290 case XML_PARSER_ATTRIBUTE_VALUE:
12291 xmlGenericError(xmlGenericErrorContext,
12292 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12293 ctxt->instate = XML_PARSER_START_TAG;
12294#ifdef DEBUG_PUSH
12295 xmlGenericError(xmlGenericErrorContext,
12296 "PP: entering START_TAG\n");
12297#endif
12298 break;
12299 case XML_PARSER_SYSTEM_LITERAL:
12300 xmlGenericError(xmlGenericErrorContext,
12301 "PP: internal error, state == SYSTEM_LITERAL\n");
12302 ctxt->instate = XML_PARSER_START_TAG;
12303#ifdef DEBUG_PUSH
12304 xmlGenericError(xmlGenericErrorContext,
12305 "PP: entering START_TAG\n");
12306#endif
12307 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012308 case XML_PARSER_PUBLIC_LITERAL:
12309 xmlGenericError(xmlGenericErrorContext,
12310 "PP: internal error, state == PUBLIC_LITERAL\n");
12311 ctxt->instate = XML_PARSER_START_TAG;
12312#ifdef DEBUG_PUSH
12313 xmlGenericError(xmlGenericErrorContext,
12314 "PP: entering START_TAG\n");
12315#endif
12316 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012317 }
12318 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012319done:
Owen Taylor3473f882001-02-23 17:55:21 +000012320#ifdef DEBUG_PUSH
12321 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12322#endif
12323 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012324encoding_error:
12325 {
12326 char buffer[150];
12327
12328 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12329 ctxt->input->cur[0], ctxt->input->cur[1],
12330 ctxt->input->cur[2], ctxt->input->cur[3]);
12331 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12332 "Input is not proper UTF-8, indicate encoding !\n%s",
12333 BAD_CAST buffer, NULL);
12334 }
12335 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012336}
12337
12338/**
Daniel Veillard65686452012-07-19 18:25:01 +080012339 * xmlParseCheckTransition:
12340 * @ctxt: an XML parser context
12341 * @chunk: a char array
12342 * @size: the size in byte of the chunk
12343 *
12344 * Check depending on the current parser state if the chunk given must be
12345 * processed immediately or one need more data to advance on parsing.
12346 *
12347 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12348 */
12349static int
12350xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12351 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12352 return(-1);
12353 if (ctxt->instate == XML_PARSER_START_TAG) {
12354 if (memchr(chunk, '>', size) != NULL)
12355 return(1);
12356 return(0);
12357 }
12358 if (ctxt->progressive == XML_PARSER_COMMENT) {
12359 if (memchr(chunk, '>', size) != NULL)
12360 return(1);
12361 return(0);
12362 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012363 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12364 if (memchr(chunk, '>', size) != NULL)
12365 return(1);
12366 return(0);
12367 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012368 if (ctxt->progressive == XML_PARSER_PI) {
12369 if (memchr(chunk, '>', size) != NULL)
12370 return(1);
12371 return(0);
12372 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012373 if (ctxt->instate == XML_PARSER_END_TAG) {
12374 if (memchr(chunk, '>', size) != NULL)
12375 return(1);
12376 return(0);
12377 }
12378 if ((ctxt->progressive == XML_PARSER_DTD) ||
12379 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012380 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012381 return(1);
12382 return(0);
12383 }
Daniel Veillard65686452012-07-19 18:25:01 +080012384 return(1);
12385}
12386
12387/**
Owen Taylor3473f882001-02-23 17:55:21 +000012388 * xmlParseChunk:
12389 * @ctxt: an XML parser context
12390 * @chunk: an char array
12391 * @size: the size in byte of the chunk
12392 * @terminate: last chunk indicator
12393 *
12394 * Parse a Chunk of memory
12395 *
12396 * Returns zero if no error, the xmlParserErrors otherwise.
12397 */
12398int
12399xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12400 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012401 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012402 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012403 size_t old_avail = 0;
12404 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012405
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012406 if (ctxt == NULL)
12407 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012408 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012409 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012410 if (ctxt->instate == XML_PARSER_EOF)
12411 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012412 if (ctxt->instate == XML_PARSER_START)
12413 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012414 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12415 (chunk[size - 1] == '\r')) {
12416 end_in_lf = 1;
12417 size--;
12418 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012419
12420xmldecl_done:
12421
Owen Taylor3473f882001-02-23 17:55:21 +000012422 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12423 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012424 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12425 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012426 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012427
Daniel Veillard65686452012-07-19 18:25:01 +080012428 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012429 /*
12430 * Specific handling if we autodetected an encoding, we should not
12431 * push more than the first line ... which depend on the encoding
12432 * And only push the rest once the final encoding was detected
12433 */
12434 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12435 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012436 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012437
12438 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12439 BAD_CAST "UTF-16")) ||
12440 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12441 BAD_CAST "UTF16")))
12442 len = 90;
12443 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12444 BAD_CAST "UCS-4")) ||
12445 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12446 BAD_CAST "UCS4")))
12447 len = 180;
12448
12449 if (ctxt->input->buf->rawconsumed < len)
12450 len -= ctxt->input->buf->rawconsumed;
12451
Raul Hudeaba9716a2010-03-15 10:13:29 +010012452 /*
12453 * Change size for reading the initial declaration only
12454 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12455 * will blindly copy extra bytes from memory.
12456 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012457 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012458 remain = size - len;
12459 size = len;
12460 } else {
12461 remain = 0;
12462 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012463 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012464 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012465 if (res < 0) {
12466 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012467 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012468 return (XML_PARSER_EOF);
12469 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012470 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012471#ifdef DEBUG_PUSH
12472 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12473#endif
12474
Owen Taylor3473f882001-02-23 17:55:21 +000012475 } else if (ctxt->instate != XML_PARSER_EOF) {
12476 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12477 xmlParserInputBufferPtr in = ctxt->input->buf;
12478 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12479 (in->raw != NULL)) {
12480 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012481 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12482 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012483
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012484 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012485 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012486 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012487 xmlGenericError(xmlGenericErrorContext,
12488 "xmlParseChunk: encoder error\n");
12489 return(XML_ERR_INVALID_ENCODING);
12490 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012491 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012492 }
12493 }
12494 }
Daniel Veillard65686452012-07-19 18:25:01 +080012495 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012496 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012497 } else {
12498 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12499 avail = xmlBufUse(ctxt->input->buf->buffer);
12500 /*
12501 * Depending on the current state it may not be such
12502 * a good idea to try parsing if there is nothing in the chunk
12503 * which would be worth doing a parser state transition and we
12504 * need to wait for more data
12505 */
12506 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12507 (old_avail == 0) || (avail == 0) ||
12508 (xmlParseCheckTransition(ctxt,
12509 (const char *)&ctxt->input->base[old_avail],
12510 avail - old_avail)))
12511 xmlParseTryOrFinish(ctxt, terminate);
12512 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012513 if (ctxt->instate == XML_PARSER_EOF)
12514 return(ctxt->errNo);
12515
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012516 if ((ctxt->input != NULL) &&
12517 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12518 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12519 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12520 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012521 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012522 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012523 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12524 return(ctxt->errNo);
12525
12526 if (remain != 0) {
12527 chunk += size;
12528 size = remain;
12529 remain = 0;
12530 goto xmldecl_done;
12531 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012532 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12533 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012534 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12535 ctxt->input);
12536 size_t current = ctxt->input->cur - ctxt->input->base;
12537
Daniel Veillarda617e242006-01-09 14:38:44 +000012538 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012539
12540 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12541 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012542 }
Owen Taylor3473f882001-02-23 17:55:21 +000012543 if (terminate) {
12544 /*
12545 * Check for termination
12546 */
Daniel Veillard65686452012-07-19 18:25:01 +080012547 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012548
12549 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012550 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012551 cur_avail = ctxt->input->length -
12552 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012553 else
Daniel Veillard65686452012-07-19 18:25:01 +080012554 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12555 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012556 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012557
Owen Taylor3473f882001-02-23 17:55:21 +000012558 if ((ctxt->instate != XML_PARSER_EOF) &&
12559 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012560 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012561 }
Daniel Veillard65686452012-07-19 18:25:01 +080012562 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012563 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012564 }
Owen Taylor3473f882001-02-23 17:55:21 +000012565 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012566 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012567 ctxt->sax->endDocument(ctxt->userData);
12568 }
12569 ctxt->instate = XML_PARSER_EOF;
12570 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012571 if (ctxt->wellFormed == 0)
12572 return((xmlParserErrors) ctxt->errNo);
12573 else
12574 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012575}
12576
12577/************************************************************************
12578 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012579 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012580 * *
12581 ************************************************************************/
12582
12583/**
Owen Taylor3473f882001-02-23 17:55:21 +000012584 * xmlCreatePushParserCtxt:
12585 * @sax: a SAX handler
12586 * @user_data: The user data returned on SAX callbacks
12587 * @chunk: a pointer to an array of chars
12588 * @size: number of chars in the array
12589 * @filename: an optional file name or URI
12590 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012591 * Create a parser context for using the XML parser in push mode.
12592 * If @buffer and @size are non-NULL, the data is used to detect
12593 * the encoding. The remaining characters will be parsed so they
12594 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012595 * To allow content encoding detection, @size should be >= 4
12596 * The value of @filename is used for fetching external entities
12597 * and error/warning reports.
12598 *
12599 * Returns the new parser context or NULL
12600 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012601
Owen Taylor3473f882001-02-23 17:55:21 +000012602xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012603xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012604 const char *chunk, int size, const char *filename) {
12605 xmlParserCtxtPtr ctxt;
12606 xmlParserInputPtr inputStream;
12607 xmlParserInputBufferPtr buf;
12608 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12609
12610 /*
12611 * plug some encoding conversion routines
12612 */
12613 if ((chunk != NULL) && (size >= 4))
12614 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12615
12616 buf = xmlAllocParserInputBuffer(enc);
12617 if (buf == NULL) return(NULL);
12618
12619 ctxt = xmlNewParserCtxt();
12620 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012621 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012622 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012623 return(NULL);
12624 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012625 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012626 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12627 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012628 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012629 xmlFreeParserInputBuffer(buf);
12630 xmlFreeParserCtxt(ctxt);
12631 return(NULL);
12632 }
Owen Taylor3473f882001-02-23 17:55:21 +000012633 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012634#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012635 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012636#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012637 xmlFree(ctxt->sax);
12638 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12639 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012640 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012641 xmlFreeParserInputBuffer(buf);
12642 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012643 return(NULL);
12644 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012645 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12646 if (sax->initialized == XML_SAX2_MAGIC)
12647 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12648 else
12649 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012650 if (user_data != NULL)
12651 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012652 }
Owen Taylor3473f882001-02-23 17:55:21 +000012653 if (filename == NULL) {
12654 ctxt->directory = NULL;
12655 } else {
12656 ctxt->directory = xmlParserGetDirectory(filename);
12657 }
12658
12659 inputStream = xmlNewInputStream(ctxt);
12660 if (inputStream == NULL) {
12661 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012662 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012663 return(NULL);
12664 }
12665
12666 if (filename == NULL)
12667 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012668 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012669 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012670 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012671 if (inputStream->filename == NULL) {
12672 xmlFreeParserCtxt(ctxt);
12673 xmlFreeParserInputBuffer(buf);
12674 return(NULL);
12675 }
12676 }
Owen Taylor3473f882001-02-23 17:55:21 +000012677 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012678 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012679 inputPush(ctxt, inputStream);
12680
William M. Brack3a1cd212005-02-11 14:35:54 +000012681 /*
12682 * If the caller didn't provide an initial 'chunk' for determining
12683 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12684 * that it can be automatically determined later
12685 */
12686 if ((size == 0) || (chunk == NULL)) {
12687 ctxt->charset = XML_CHAR_ENCODING_NONE;
12688 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012689 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12690 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012691
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012692 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012693
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012694 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012695#ifdef DEBUG_PUSH
12696 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12697#endif
12698 }
12699
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012700 if (enc != XML_CHAR_ENCODING_NONE) {
12701 xmlSwitchEncoding(ctxt, enc);
12702 }
12703
Owen Taylor3473f882001-02-23 17:55:21 +000012704 return(ctxt);
12705}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012706#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012707
12708/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012709 * xmlHaltParser:
12710 * @ctxt: an XML parser context
12711 *
12712 * Blocks further parser processing don't override error
12713 * for internal use
12714 */
12715static void
12716xmlHaltParser(xmlParserCtxtPtr ctxt) {
12717 if (ctxt == NULL)
12718 return;
12719 ctxt->instate = XML_PARSER_EOF;
12720 ctxt->disableSAX = 1;
12721 if (ctxt->input != NULL) {
12722 /*
12723 * in case there was a specific allocation deallocate before
12724 * overriding base
12725 */
12726 if (ctxt->input->free != NULL) {
12727 ctxt->input->free((xmlChar *) ctxt->input->base);
12728 ctxt->input->free = NULL;
12729 }
12730 ctxt->input->cur = BAD_CAST"";
12731 ctxt->input->base = ctxt->input->cur;
12732 }
12733}
12734
12735/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012736 * xmlStopParser:
12737 * @ctxt: an XML parser context
12738 *
12739 * Blocks further parser processing
12740 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012741void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012742xmlStopParser(xmlParserCtxtPtr ctxt) {
12743 if (ctxt == NULL)
12744 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012745 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012746 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012747}
12748
12749/**
Owen Taylor3473f882001-02-23 17:55:21 +000012750 * xmlCreateIOParserCtxt:
12751 * @sax: a SAX handler
12752 * @user_data: The user data returned on SAX callbacks
12753 * @ioread: an I/O read function
12754 * @ioclose: an I/O close function
12755 * @ioctx: an I/O handler
12756 * @enc: the charset encoding if known
12757 *
12758 * Create a parser context for using the XML parser with an existing
12759 * I/O stream
12760 *
12761 * Returns the new parser context or NULL
12762 */
12763xmlParserCtxtPtr
12764xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12765 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12766 void *ioctx, xmlCharEncoding enc) {
12767 xmlParserCtxtPtr ctxt;
12768 xmlParserInputPtr inputStream;
12769 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012770
Daniel Veillard42595322004-11-08 10:52:06 +000012771 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012772
12773 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012774 if (buf == NULL) {
12775 if (ioclose != NULL)
12776 ioclose(ioctx);
12777 return (NULL);
12778 }
Owen Taylor3473f882001-02-23 17:55:21 +000012779
12780 ctxt = xmlNewParserCtxt();
12781 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012782 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012783 return(NULL);
12784 }
12785 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012786#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012787 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012788#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012789 xmlFree(ctxt->sax);
12790 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12791 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012792 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012793 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012794 return(NULL);
12795 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012796 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12797 if (sax->initialized == XML_SAX2_MAGIC)
12798 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12799 else
12800 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012801 if (user_data != NULL)
12802 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012803 }
Owen Taylor3473f882001-02-23 17:55:21 +000012804
12805 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12806 if (inputStream == NULL) {
12807 xmlFreeParserCtxt(ctxt);
12808 return(NULL);
12809 }
12810 inputPush(ctxt, inputStream);
12811
12812 return(ctxt);
12813}
12814
Daniel Veillard4432df22003-09-28 18:58:27 +000012815#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012816/************************************************************************
12817 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012818 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012819 * *
12820 ************************************************************************/
12821
12822/**
12823 * xmlIOParseDTD:
12824 * @sax: the SAX handler block or NULL
12825 * @input: an Input Buffer
12826 * @enc: the charset encoding if known
12827 *
12828 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012829 *
Owen Taylor3473f882001-02-23 17:55:21 +000012830 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012831 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012832 */
12833
12834xmlDtdPtr
12835xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12836 xmlCharEncoding enc) {
12837 xmlDtdPtr ret = NULL;
12838 xmlParserCtxtPtr ctxt;
12839 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012840 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012841
12842 if (input == NULL)
12843 return(NULL);
12844
12845 ctxt = xmlNewParserCtxt();
12846 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012847 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012848 return(NULL);
12849 }
12850
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012851 /* We are loading a DTD */
12852 ctxt->options |= XML_PARSE_DTDLOAD;
12853
Owen Taylor3473f882001-02-23 17:55:21 +000012854 /*
12855 * Set-up the SAX context
12856 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012857 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012858 if (ctxt->sax != NULL)
12859 xmlFree(ctxt->sax);
12860 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012861 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012862 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012863 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012864
12865 /*
12866 * generate a parser input from the I/O handler
12867 */
12868
Daniel Veillard43caefb2003-12-07 19:32:22 +000012869 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012870 if (pinput == NULL) {
12871 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012872 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012873 xmlFreeParserCtxt(ctxt);
12874 return(NULL);
12875 }
12876
12877 /*
12878 * plug some encoding conversion routines here.
12879 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012880 if (xmlPushInput(ctxt, pinput) < 0) {
12881 if (sax != NULL) ctxt->sax = NULL;
12882 xmlFreeParserCtxt(ctxt);
12883 return(NULL);
12884 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012885 if (enc != XML_CHAR_ENCODING_NONE) {
12886 xmlSwitchEncoding(ctxt, enc);
12887 }
Owen Taylor3473f882001-02-23 17:55:21 +000012888
12889 pinput->filename = NULL;
12890 pinput->line = 1;
12891 pinput->col = 1;
12892 pinput->base = ctxt->input->cur;
12893 pinput->cur = ctxt->input->cur;
12894 pinput->free = NULL;
12895
12896 /*
12897 * let's parse that entity knowing it's an external subset.
12898 */
12899 ctxt->inSubset = 2;
12900 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012901 if (ctxt->myDoc == NULL) {
12902 xmlErrMemory(ctxt, "New Doc failed");
12903 return(NULL);
12904 }
12905 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012906 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12907 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012908
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012909 if ((enc == XML_CHAR_ENCODING_NONE) &&
12910 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012911 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012912 * Get the 4 first bytes and decode the charset
12913 * if enc != XML_CHAR_ENCODING_NONE
12914 * plug some encoding conversion routines.
12915 */
12916 start[0] = RAW;
12917 start[1] = NXT(1);
12918 start[2] = NXT(2);
12919 start[3] = NXT(3);
12920 enc = xmlDetectCharEncoding(start, 4);
12921 if (enc != XML_CHAR_ENCODING_NONE) {
12922 xmlSwitchEncoding(ctxt, enc);
12923 }
12924 }
12925
Owen Taylor3473f882001-02-23 17:55:21 +000012926 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12927
12928 if (ctxt->myDoc != NULL) {
12929 if (ctxt->wellFormed) {
12930 ret = ctxt->myDoc->extSubset;
12931 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012932 if (ret != NULL) {
12933 xmlNodePtr tmp;
12934
12935 ret->doc = NULL;
12936 tmp = ret->children;
12937 while (tmp != NULL) {
12938 tmp->doc = NULL;
12939 tmp = tmp->next;
12940 }
12941 }
Owen Taylor3473f882001-02-23 17:55:21 +000012942 } else {
12943 ret = NULL;
12944 }
12945 xmlFreeDoc(ctxt->myDoc);
12946 ctxt->myDoc = NULL;
12947 }
12948 if (sax != NULL) ctxt->sax = NULL;
12949 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012950
Owen Taylor3473f882001-02-23 17:55:21 +000012951 return(ret);
12952}
12953
12954/**
12955 * xmlSAXParseDTD:
12956 * @sax: the SAX handler block
12957 * @ExternalID: a NAME* containing the External ID of the DTD
12958 * @SystemID: a NAME* containing the URL to the DTD
12959 *
12960 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012961 *
Owen Taylor3473f882001-02-23 17:55:21 +000012962 * Returns the resulting xmlDtdPtr or NULL in case of error.
12963 */
12964
12965xmlDtdPtr
12966xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12967 const xmlChar *SystemID) {
12968 xmlDtdPtr ret = NULL;
12969 xmlParserCtxtPtr ctxt;
12970 xmlParserInputPtr input = NULL;
12971 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012972 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012973
12974 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12975
12976 ctxt = xmlNewParserCtxt();
12977 if (ctxt == NULL) {
12978 return(NULL);
12979 }
12980
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012981 /* We are loading a DTD */
12982 ctxt->options |= XML_PARSE_DTDLOAD;
12983
Owen Taylor3473f882001-02-23 17:55:21 +000012984 /*
12985 * Set-up the SAX context
12986 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012987 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012988 if (ctxt->sax != NULL)
12989 xmlFree(ctxt->sax);
12990 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012991 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012992 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012993
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012994 /*
12995 * Canonicalise the system ID
12996 */
12997 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012998 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012999 xmlFreeParserCtxt(ctxt);
13000 return(NULL);
13001 }
Owen Taylor3473f882001-02-23 17:55:21 +000013002
13003 /*
13004 * Ask the Entity resolver to load the damn thing
13005 */
13006
13007 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000013008 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
13009 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000013010 if (input == NULL) {
13011 if (sax != NULL) ctxt->sax = NULL;
13012 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000013013 if (systemIdCanonic != NULL)
13014 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000013015 return(NULL);
13016 }
13017
13018 /*
13019 * plug some encoding conversion routines here.
13020 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000013021 if (xmlPushInput(ctxt, input) < 0) {
13022 if (sax != NULL) ctxt->sax = NULL;
13023 xmlFreeParserCtxt(ctxt);
13024 if (systemIdCanonic != NULL)
13025 xmlFree(systemIdCanonic);
13026 return(NULL);
13027 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013028 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13029 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
13030 xmlSwitchEncoding(ctxt, enc);
13031 }
Owen Taylor3473f882001-02-23 17:55:21 +000013032
13033 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000013034 input->filename = (char *) systemIdCanonic;
13035 else
13036 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000013037 input->line = 1;
13038 input->col = 1;
13039 input->base = ctxt->input->cur;
13040 input->cur = ctxt->input->cur;
13041 input->free = NULL;
13042
13043 /*
13044 * let's parse that entity knowing it's an external subset.
13045 */
13046 ctxt->inSubset = 2;
13047 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000013048 if (ctxt->myDoc == NULL) {
13049 xmlErrMemory(ctxt, "New Doc failed");
13050 if (sax != NULL) ctxt->sax = NULL;
13051 xmlFreeParserCtxt(ctxt);
13052 return(NULL);
13053 }
13054 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000013055 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
13056 ExternalID, SystemID);
13057 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13058
13059 if (ctxt->myDoc != NULL) {
13060 if (ctxt->wellFormed) {
13061 ret = ctxt->myDoc->extSubset;
13062 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000013063 if (ret != NULL) {
13064 xmlNodePtr tmp;
13065
13066 ret->doc = NULL;
13067 tmp = ret->children;
13068 while (tmp != NULL) {
13069 tmp->doc = NULL;
13070 tmp = tmp->next;
13071 }
13072 }
Owen Taylor3473f882001-02-23 17:55:21 +000013073 } else {
13074 ret = NULL;
13075 }
13076 xmlFreeDoc(ctxt->myDoc);
13077 ctxt->myDoc = NULL;
13078 }
13079 if (sax != NULL) ctxt->sax = NULL;
13080 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013081
Owen Taylor3473f882001-02-23 17:55:21 +000013082 return(ret);
13083}
13084
Daniel Veillard4432df22003-09-28 18:58:27 +000013085
Owen Taylor3473f882001-02-23 17:55:21 +000013086/**
13087 * xmlParseDTD:
13088 * @ExternalID: a NAME* containing the External ID of the DTD
13089 * @SystemID: a NAME* containing the URL to the DTD
13090 *
13091 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000013092 *
Owen Taylor3473f882001-02-23 17:55:21 +000013093 * Returns the resulting xmlDtdPtr or NULL in case of error.
13094 */
13095
13096xmlDtdPtr
13097xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13098 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13099}
Daniel Veillard4432df22003-09-28 18:58:27 +000013100#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013101
13102/************************************************************************
13103 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013104 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000013105 * *
13106 ************************************************************************/
13107
13108/**
Owen Taylor3473f882001-02-23 17:55:21 +000013109 * xmlParseCtxtExternalEntity:
13110 * @ctx: the existing parsing context
13111 * @URL: the URL for the entity to load
13112 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013113 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013114 *
13115 * Parse an external general entity within an existing parsing context
13116 * An external general parsed entity is well-formed if it matches the
13117 * production labeled extParsedEnt.
13118 *
13119 * [78] extParsedEnt ::= TextDecl? content
13120 *
13121 * Returns 0 if the entity is well formed, -1 in case of args problem and
13122 * the parser error code otherwise
13123 */
13124
13125int
13126xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013127 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000013128 xmlParserCtxtPtr ctxt;
13129 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013130 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013131 xmlSAXHandlerPtr oldsax = NULL;
13132 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013133 xmlChar start[4];
13134 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013135
Daniel Veillardce682bc2004-11-05 17:22:25 +000013136 if (ctx == NULL) return(-1);
13137
Daniel Veillard0161e632008-08-28 15:36:32 +000013138 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13139 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013140 return(XML_ERR_ENTITY_LOOP);
13141 }
13142
Daniel Veillardcda96922001-08-21 10:56:31 +000013143 if (lst != NULL)
13144 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013145 if ((URL == NULL) && (ID == NULL))
13146 return(-1);
13147 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13148 return(-1);
13149
Rob Richards798743a2009-06-19 13:54:25 -040013150 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000013151 if (ctxt == NULL) {
13152 return(-1);
13153 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013154
Owen Taylor3473f882001-02-23 17:55:21 +000013155 oldsax = ctxt->sax;
13156 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013157 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013158 newDoc = xmlNewDoc(BAD_CAST "1.0");
13159 if (newDoc == NULL) {
13160 xmlFreeParserCtxt(ctxt);
13161 return(-1);
13162 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013163 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013164 if (ctx->myDoc->dict) {
13165 newDoc->dict = ctx->myDoc->dict;
13166 xmlDictReference(newDoc->dict);
13167 }
Owen Taylor3473f882001-02-23 17:55:21 +000013168 if (ctx->myDoc != NULL) {
13169 newDoc->intSubset = ctx->myDoc->intSubset;
13170 newDoc->extSubset = ctx->myDoc->extSubset;
13171 }
13172 if (ctx->myDoc->URL != NULL) {
13173 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13174 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013175 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13176 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013177 ctxt->sax = oldsax;
13178 xmlFreeParserCtxt(ctxt);
13179 newDoc->intSubset = NULL;
13180 newDoc->extSubset = NULL;
13181 xmlFreeDoc(newDoc);
13182 return(-1);
13183 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013184 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013185 nodePush(ctxt, newDoc->children);
13186 if (ctx->myDoc == NULL) {
13187 ctxt->myDoc = newDoc;
13188 } else {
13189 ctxt->myDoc = ctx->myDoc;
13190 newDoc->children->doc = ctx->myDoc;
13191 }
13192
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013193 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013194 * Get the 4 first bytes and decode the charset
13195 * if enc != XML_CHAR_ENCODING_NONE
13196 * plug some encoding conversion routines.
13197 */
13198 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013199 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13200 start[0] = RAW;
13201 start[1] = NXT(1);
13202 start[2] = NXT(2);
13203 start[3] = NXT(3);
13204 enc = xmlDetectCharEncoding(start, 4);
13205 if (enc != XML_CHAR_ENCODING_NONE) {
13206 xmlSwitchEncoding(ctxt, enc);
13207 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013208 }
13209
Owen Taylor3473f882001-02-23 17:55:21 +000013210 /*
13211 * Parse a possible text declaration first
13212 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013213 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013214 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013215 /*
13216 * An XML-1.0 document can't reference an entity not XML-1.0
13217 */
13218 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13219 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013220 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013221 "Version mismatch between document and entity\n");
13222 }
Owen Taylor3473f882001-02-23 17:55:21 +000013223 }
13224
13225 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080013226 * If the user provided its own SAX callbacks then reuse the
13227 * useData callback field, otherwise the expected setup in a
13228 * DOM builder is to have userData == ctxt
13229 */
13230 if (ctx->userData == ctx)
13231 ctxt->userData = ctxt;
13232 else
13233 ctxt->userData = ctx->userData;
13234
13235 /*
Owen Taylor3473f882001-02-23 17:55:21 +000013236 * Doing validity checking on chunk doesn't make sense
13237 */
13238 ctxt->instate = XML_PARSER_CONTENT;
13239 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013240 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013241 ctxt->loadsubset = ctx->loadsubset;
13242 ctxt->depth = ctx->depth + 1;
13243 ctxt->replaceEntities = ctx->replaceEntities;
13244 if (ctxt->validate) {
13245 ctxt->vctxt.error = ctx->vctxt.error;
13246 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000013247 } else {
13248 ctxt->vctxt.error = NULL;
13249 ctxt->vctxt.warning = NULL;
13250 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000013251 ctxt->vctxt.nodeTab = NULL;
13252 ctxt->vctxt.nodeNr = 0;
13253 ctxt->vctxt.nodeMax = 0;
13254 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013255 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13256 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013257 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13258 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13259 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013260 ctxt->dictNames = ctx->dictNames;
13261 ctxt->attsDefault = ctx->attsDefault;
13262 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013263 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013264
13265 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013266
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013267 ctx->validate = ctxt->validate;
13268 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013269 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013270 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013271 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013272 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013273 }
13274 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013275 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013276 }
13277
13278 if (!ctxt->wellFormed) {
13279 if (ctxt->errNo == 0)
13280 ret = 1;
13281 else
13282 ret = ctxt->errNo;
13283 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013284 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013285 xmlNodePtr cur;
13286
13287 /*
13288 * Return the newly created nodeset after unlinking it from
13289 * they pseudo parent.
13290 */
13291 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013292 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013293 while (cur != NULL) {
13294 cur->parent = NULL;
13295 cur = cur->next;
13296 }
13297 newDoc->children->children = NULL;
13298 }
13299 ret = 0;
13300 }
13301 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013302 ctxt->dict = NULL;
13303 ctxt->attsDefault = NULL;
13304 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013305 xmlFreeParserCtxt(ctxt);
13306 newDoc->intSubset = NULL;
13307 newDoc->extSubset = NULL;
13308 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013309
Owen Taylor3473f882001-02-23 17:55:21 +000013310 return(ret);
13311}
13312
13313/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013314 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013315 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013316 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013317 * @sax: the SAX handler bloc (possibly NULL)
13318 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13319 * @depth: Used for loop detection, use 0
13320 * @URL: the URL for the entity to load
13321 * @ID: the System ID for the entity to load
13322 * @list: the return value for the set of parsed nodes
13323 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013324 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013325 *
13326 * Returns 0 if the entity is well formed, -1 in case of args problem and
13327 * the parser error code otherwise
13328 */
13329
Daniel Veillard7d515752003-09-26 19:12:37 +000013330static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013331xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13332 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013333 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013334 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013335 xmlParserCtxtPtr ctxt;
13336 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013337 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013338 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013339 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013340 xmlChar start[4];
13341 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013342
Daniel Veillard0161e632008-08-28 15:36:32 +000013343 if (((depth > 40) &&
13344 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13345 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013346 return(XML_ERR_ENTITY_LOOP);
13347 }
13348
Owen Taylor3473f882001-02-23 17:55:21 +000013349 if (list != NULL)
13350 *list = NULL;
13351 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013352 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013353 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013354 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013355
13356
Rob Richards9c0aa472009-03-26 18:10:19 +000013357 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013358 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013359 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013360 if (oldctxt != NULL) {
13361 ctxt->_private = oldctxt->_private;
13362 ctxt->loadsubset = oldctxt->loadsubset;
13363 ctxt->validate = oldctxt->validate;
13364 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013365 ctxt->record_info = oldctxt->record_info;
13366 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13367 ctxt->node_seq.length = oldctxt->node_seq.length;
13368 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013369 } else {
13370 /*
13371 * Doing validity checking on chunk without context
13372 * doesn't make sense
13373 */
13374 ctxt->_private = NULL;
13375 ctxt->validate = 0;
13376 ctxt->external = 2;
13377 ctxt->loadsubset = 0;
13378 }
Owen Taylor3473f882001-02-23 17:55:21 +000013379 if (sax != NULL) {
13380 oldsax = ctxt->sax;
13381 ctxt->sax = sax;
13382 if (user_data != NULL)
13383 ctxt->userData = user_data;
13384 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013385 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013386 newDoc = xmlNewDoc(BAD_CAST "1.0");
13387 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013388 ctxt->node_seq.maximum = 0;
13389 ctxt->node_seq.length = 0;
13390 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013391 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013392 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013393 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013394 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013395 newDoc->intSubset = doc->intSubset;
13396 newDoc->extSubset = doc->extSubset;
13397 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013398 xmlDictReference(newDoc->dict);
13399
Owen Taylor3473f882001-02-23 17:55:21 +000013400 if (doc->URL != NULL) {
13401 newDoc->URL = xmlStrdup(doc->URL);
13402 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013403 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13404 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013405 if (sax != NULL)
13406 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013407 ctxt->node_seq.maximum = 0;
13408 ctxt->node_seq.length = 0;
13409 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013410 xmlFreeParserCtxt(ctxt);
13411 newDoc->intSubset = NULL;
13412 newDoc->extSubset = NULL;
13413 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013414 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013415 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013416 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013417 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013418 ctxt->myDoc = doc;
13419 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013420
Daniel Veillard0161e632008-08-28 15:36:32 +000013421 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013422 * Get the 4 first bytes and decode the charset
13423 * if enc != XML_CHAR_ENCODING_NONE
13424 * plug some encoding conversion routines.
13425 */
13426 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013427 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13428 start[0] = RAW;
13429 start[1] = NXT(1);
13430 start[2] = NXT(2);
13431 start[3] = NXT(3);
13432 enc = xmlDetectCharEncoding(start, 4);
13433 if (enc != XML_CHAR_ENCODING_NONE) {
13434 xmlSwitchEncoding(ctxt, enc);
13435 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013436 }
13437
Owen Taylor3473f882001-02-23 17:55:21 +000013438 /*
13439 * Parse a possible text declaration first
13440 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013441 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013442 xmlParseTextDecl(ctxt);
13443 }
13444
Owen Taylor3473f882001-02-23 17:55:21 +000013445 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013446 ctxt->depth = depth;
13447
13448 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013449
Daniel Veillard561b7f82002-03-20 21:55:57 +000013450 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013451 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013452 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013453 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013454 }
13455 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013456 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013457 }
13458
13459 if (!ctxt->wellFormed) {
13460 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013461 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013462 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013463 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013464 } else {
13465 if (list != NULL) {
13466 xmlNodePtr cur;
13467
13468 /*
13469 * Return the newly created nodeset after unlinking it from
13470 * they pseudo parent.
13471 */
13472 cur = newDoc->children->children;
13473 *list = cur;
13474 while (cur != NULL) {
13475 cur->parent = NULL;
13476 cur = cur->next;
13477 }
13478 newDoc->children->children = NULL;
13479 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013480 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013481 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013482
13483 /*
13484 * Record in the parent context the number of entities replacement
13485 * done when parsing that reference.
13486 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013487 if (oldctxt != NULL)
13488 oldctxt->nbentities += ctxt->nbentities;
13489
Daniel Veillard0161e632008-08-28 15:36:32 +000013490 /*
13491 * Also record the size of the entity parsed
13492 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013493 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013494 oldctxt->sizeentities += ctxt->input->consumed;
13495 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13496 }
13497 /*
13498 * And record the last error if any
13499 */
13500 if (ctxt->lastError.code != XML_ERR_OK)
13501 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13502
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013503 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013504 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013505 if (oldctxt != NULL) {
13506 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13507 oldctxt->node_seq.length = ctxt->node_seq.length;
13508 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13509 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013510 ctxt->node_seq.maximum = 0;
13511 ctxt->node_seq.length = 0;
13512 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013513 xmlFreeParserCtxt(ctxt);
13514 newDoc->intSubset = NULL;
13515 newDoc->extSubset = NULL;
13516 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013517
Owen Taylor3473f882001-02-23 17:55:21 +000013518 return(ret);
13519}
13520
Daniel Veillard81273902003-09-30 00:43:48 +000013521#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013522/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013523 * xmlParseExternalEntity:
13524 * @doc: the document the chunk pertains to
13525 * @sax: the SAX handler bloc (possibly NULL)
13526 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13527 * @depth: Used for loop detection, use 0
13528 * @URL: the URL for the entity to load
13529 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013530 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013531 *
13532 * Parse an external general entity
13533 * An external general parsed entity is well-formed if it matches the
13534 * production labeled extParsedEnt.
13535 *
13536 * [78] extParsedEnt ::= TextDecl? content
13537 *
13538 * Returns 0 if the entity is well formed, -1 in case of args problem and
13539 * the parser error code otherwise
13540 */
13541
13542int
13543xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013544 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013545 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013546 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013547}
13548
13549/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013550 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013551 * @doc: the document the chunk pertains to
13552 * @sax: the SAX handler bloc (possibly NULL)
13553 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13554 * @depth: Used for loop detection, use 0
13555 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013556 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013557 *
13558 * Parse a well-balanced chunk of an XML document
13559 * called by the parser
13560 * The allowed sequence for the Well Balanced Chunk is the one defined by
13561 * the content production in the XML grammar:
13562 *
13563 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13564 *
13565 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13566 * the parser error code otherwise
13567 */
13568
13569int
13570xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013571 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013572 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13573 depth, string, lst, 0 );
13574}
Daniel Veillard81273902003-09-30 00:43:48 +000013575#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013576
13577/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013578 * xmlParseBalancedChunkMemoryInternal:
13579 * @oldctxt: the existing parsing context
13580 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13581 * @user_data: the user data field for the parser context
13582 * @lst: the return value for the set of parsed nodes
13583 *
13584 *
13585 * Parse a well-balanced chunk of an XML document
13586 * called by the parser
13587 * The allowed sequence for the Well Balanced Chunk is the one defined by
13588 * the content production in the XML grammar:
13589 *
13590 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13591 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013592 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13593 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013594 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013595 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013596 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013597 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013598static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013599xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13600 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13601 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013602 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013603 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013604 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013605 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013606 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013607 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013608 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013609#ifdef SAX2
13610 int i;
13611#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013612
Daniel Veillard0161e632008-08-28 15:36:32 +000013613 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13614 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013615 return(XML_ERR_ENTITY_LOOP);
13616 }
13617
13618
13619 if (lst != NULL)
13620 *lst = NULL;
13621 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013622 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013623
13624 size = xmlStrlen(string);
13625
13626 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013627 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013628 if (user_data != NULL)
13629 ctxt->userData = user_data;
13630 else
13631 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013632 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13633 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013634 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13635 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13636 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013637
Daniel Veillard74eaec12009-08-26 15:57:20 +020013638#ifdef SAX2
13639 /* propagate namespaces down the entity */
13640 for (i = 0;i < oldctxt->nsNr;i += 2) {
13641 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13642 }
13643#endif
13644
Daniel Veillard328f48c2002-11-15 15:24:34 +000013645 oldsax = ctxt->sax;
13646 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013647 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013648 ctxt->replaceEntities = oldctxt->replaceEntities;
13649 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013650
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013651 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013652 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013653 newDoc = xmlNewDoc(BAD_CAST "1.0");
13654 if (newDoc == NULL) {
13655 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013656 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013657 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013658 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013659 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013660 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013661 newDoc->dict = ctxt->dict;
13662 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013663 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013664 } else {
13665 ctxt->myDoc = oldctxt->myDoc;
13666 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013667 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013668 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013669 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13670 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013671 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013672 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013673 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013674 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013675 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013676 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013677 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013678 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013679 ctxt->myDoc->children = NULL;
13680 ctxt->myDoc->last = NULL;
13681 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013682 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013683 ctxt->instate = XML_PARSER_CONTENT;
13684 ctxt->depth = oldctxt->depth + 1;
13685
Daniel Veillard328f48c2002-11-15 15:24:34 +000013686 ctxt->validate = 0;
13687 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013688 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13689 /*
13690 * ID/IDREF registration will be done in xmlValidateElement below
13691 */
13692 ctxt->loadsubset |= XML_SKIP_IDS;
13693 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013694 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013695 ctxt->attsDefault = oldctxt->attsDefault;
13696 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013697
Daniel Veillard68e9e742002-11-16 15:35:11 +000013698 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013699 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013700 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013701 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013702 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013703 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013704 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013705 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013706 }
13707
13708 if (!ctxt->wellFormed) {
13709 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013710 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013711 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013712 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013713 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013714 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013715 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013716
William M. Brack7b9154b2003-09-27 19:23:50 +000013717 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013718 xmlNodePtr cur;
13719
13720 /*
13721 * Return the newly created nodeset after unlinking it from
13722 * they pseudo parent.
13723 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013724 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013725 *lst = cur;
13726 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013727#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013728 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13729 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13730 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013731 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13732 oldctxt->myDoc, cur);
13733 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013734#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013735 cur->parent = NULL;
13736 cur = cur->next;
13737 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013738 ctxt->myDoc->children->children = NULL;
13739 }
13740 if (ctxt->myDoc != NULL) {
13741 xmlFreeNode(ctxt->myDoc->children);
13742 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013743 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013744 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013745
13746 /*
13747 * Record in the parent context the number of entities replacement
13748 * done when parsing that reference.
13749 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013750 if (oldctxt != NULL)
13751 oldctxt->nbentities += ctxt->nbentities;
13752
Daniel Veillard0161e632008-08-28 15:36:32 +000013753 /*
13754 * Also record the last error if any
13755 */
13756 if (ctxt->lastError.code != XML_ERR_OK)
13757 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13758
Daniel Veillard328f48c2002-11-15 15:24:34 +000013759 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013760 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013761 ctxt->attsDefault = NULL;
13762 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013763 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013764 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013765 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013766 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013767
Daniel Veillard328f48c2002-11-15 15:24:34 +000013768 return(ret);
13769}
13770
Daniel Veillard29b17482004-08-16 00:39:03 +000013771/**
13772 * xmlParseInNodeContext:
13773 * @node: the context node
13774 * @data: the input string
13775 * @datalen: the input string length in bytes
13776 * @options: a combination of xmlParserOption
13777 * @lst: the return value for the set of parsed nodes
13778 *
13779 * Parse a well-balanced chunk of an XML document
13780 * within the context (DTD, namespaces, etc ...) of the given node.
13781 *
13782 * The allowed sequence for the data is a Well Balanced Chunk defined by
13783 * the content production in the XML grammar:
13784 *
13785 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13786 *
13787 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13788 * error code otherwise
13789 */
13790xmlParserErrors
13791xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13792 int options, xmlNodePtr *lst) {
13793#ifdef SAX2
13794 xmlParserCtxtPtr ctxt;
13795 xmlDocPtr doc = NULL;
13796 xmlNodePtr fake, cur;
13797 int nsnr = 0;
13798
13799 xmlParserErrors ret = XML_ERR_OK;
13800
13801 /*
13802 * check all input parameters, grab the document
13803 */
13804 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13805 return(XML_ERR_INTERNAL_ERROR);
13806 switch (node->type) {
13807 case XML_ELEMENT_NODE:
13808 case XML_ATTRIBUTE_NODE:
13809 case XML_TEXT_NODE:
13810 case XML_CDATA_SECTION_NODE:
13811 case XML_ENTITY_REF_NODE:
13812 case XML_PI_NODE:
13813 case XML_COMMENT_NODE:
13814 case XML_DOCUMENT_NODE:
13815 case XML_HTML_DOCUMENT_NODE:
13816 break;
13817 default:
13818 return(XML_ERR_INTERNAL_ERROR);
13819
13820 }
13821 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13822 (node->type != XML_DOCUMENT_NODE) &&
13823 (node->type != XML_HTML_DOCUMENT_NODE))
13824 node = node->parent;
13825 if (node == NULL)
13826 return(XML_ERR_INTERNAL_ERROR);
13827 if (node->type == XML_ELEMENT_NODE)
13828 doc = node->doc;
13829 else
13830 doc = (xmlDocPtr) node;
13831 if (doc == NULL)
13832 return(XML_ERR_INTERNAL_ERROR);
13833
13834 /*
13835 * allocate a context and set-up everything not related to the
13836 * node position in the tree
13837 */
13838 if (doc->type == XML_DOCUMENT_NODE)
13839 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13840#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013841 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013842 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013843 /*
13844 * When parsing in context, it makes no sense to add implied
13845 * elements like html/body/etc...
13846 */
13847 options |= HTML_PARSE_NOIMPLIED;
13848 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013849#endif
13850 else
13851 return(XML_ERR_INTERNAL_ERROR);
13852
13853 if (ctxt == NULL)
13854 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013855
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013856 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013857 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13858 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13859 * we must wait until the last moment to free the original one.
13860 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013861 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013862 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013863 xmlDictFree(ctxt->dict);
13864 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013865 } else
13866 options |= XML_PARSE_NODICT;
13867
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013868 if (doc->encoding != NULL) {
13869 xmlCharEncodingHandlerPtr hdlr;
13870
13871 if (ctxt->encoding != NULL)
13872 xmlFree((xmlChar *) ctxt->encoding);
13873 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13874
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013875 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013876 if (hdlr != NULL) {
13877 xmlSwitchToEncoding(ctxt, hdlr);
13878 } else {
13879 return(XML_ERR_UNSUPPORTED_ENCODING);
13880 }
13881 }
13882
Daniel Veillard37334572008-07-31 08:20:02 +000013883 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013884 xmlDetectSAX2(ctxt);
13885 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013886 /* parsing in context, i.e. as within existing content */
13887 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013888
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013889 fake = xmlNewComment(NULL);
13890 if (fake == NULL) {
13891 xmlFreeParserCtxt(ctxt);
13892 return(XML_ERR_NO_MEMORY);
13893 }
13894 xmlAddChild(node, fake);
13895
Daniel Veillard29b17482004-08-16 00:39:03 +000013896 if (node->type == XML_ELEMENT_NODE) {
13897 nodePush(ctxt, node);
13898 /*
13899 * initialize the SAX2 namespaces stack
13900 */
13901 cur = node;
13902 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13903 xmlNsPtr ns = cur->nsDef;
13904 const xmlChar *iprefix, *ihref;
13905
13906 while (ns != NULL) {
13907 if (ctxt->dict) {
13908 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13909 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13910 } else {
13911 iprefix = ns->prefix;
13912 ihref = ns->href;
13913 }
13914
13915 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13916 nsPush(ctxt, iprefix, ihref);
13917 nsnr++;
13918 }
13919 ns = ns->next;
13920 }
13921 cur = cur->parent;
13922 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013923 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013924
13925 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13926 /*
13927 * ID/IDREF registration will be done in xmlValidateElement below
13928 */
13929 ctxt->loadsubset |= XML_SKIP_IDS;
13930 }
13931
Daniel Veillard499cc922006-01-18 17:22:35 +000013932#ifdef LIBXML_HTML_ENABLED
13933 if (doc->type == XML_HTML_DOCUMENT_NODE)
13934 __htmlParseContent(ctxt);
13935 else
13936#endif
13937 xmlParseContent(ctxt);
13938
Daniel Veillard29b17482004-08-16 00:39:03 +000013939 nsPop(ctxt, nsnr);
13940 if ((RAW == '<') && (NXT(1) == '/')) {
13941 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13942 } else if (RAW != 0) {
13943 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13944 }
13945 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13946 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13947 ctxt->wellFormed = 0;
13948 }
13949
13950 if (!ctxt->wellFormed) {
13951 if (ctxt->errNo == 0)
13952 ret = XML_ERR_INTERNAL_ERROR;
13953 else
13954 ret = (xmlParserErrors)ctxt->errNo;
13955 } else {
13956 ret = XML_ERR_OK;
13957 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013958
Daniel Veillard29b17482004-08-16 00:39:03 +000013959 /*
13960 * Return the newly created nodeset after unlinking it from
13961 * the pseudo sibling.
13962 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013963
Daniel Veillard29b17482004-08-16 00:39:03 +000013964 cur = fake->next;
13965 fake->next = NULL;
13966 node->last = fake;
13967
13968 if (cur != NULL) {
13969 cur->prev = NULL;
13970 }
13971
13972 *lst = cur;
13973
13974 while (cur != NULL) {
13975 cur->parent = NULL;
13976 cur = cur->next;
13977 }
13978
13979 xmlUnlinkNode(fake);
13980 xmlFreeNode(fake);
13981
13982
13983 if (ret != XML_ERR_OK) {
13984 xmlFreeNodeList(*lst);
13985 *lst = NULL;
13986 }
William M. Brackc3f81342004-10-03 01:22:44 +000013987
William M. Brackb7b54de2004-10-06 16:38:01 +000013988 if (doc->dict != NULL)
13989 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013990 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013991
Daniel Veillard29b17482004-08-16 00:39:03 +000013992 return(ret);
13993#else /* !SAX2 */
13994 return(XML_ERR_INTERNAL_ERROR);
13995#endif
13996}
13997
Daniel Veillard81273902003-09-30 00:43:48 +000013998#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013999/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000014000 * xmlParseBalancedChunkMemoryRecover:
14001 * @doc: the document the chunk pertains to
14002 * @sax: the SAX handler bloc (possibly NULL)
14003 * @user_data: The user data returned on SAX callbacks (possibly NULL)
14004 * @depth: Used for loop detection, use 0
14005 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
14006 * @lst: the return value for the set of parsed nodes
14007 * @recover: return nodes even if the data is broken (use 0)
14008 *
14009 *
14010 * Parse a well-balanced chunk of an XML document
14011 * called by the parser
14012 * The allowed sequence for the Well Balanced Chunk is the one defined by
14013 * the content production in the XML grammar:
14014 *
14015 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
14016 *
14017 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
14018 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000014019 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000014020 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000014021 * the parsed chunk is not well balanced, assuming the parsing succeeded to
14022 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000014023 */
14024int
14025xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000014026 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000014027 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000014028 xmlParserCtxtPtr ctxt;
14029 xmlDocPtr newDoc;
14030 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014031 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000014032 int size;
14033 int ret = 0;
14034
Daniel Veillard0161e632008-08-28 15:36:32 +000014035 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000014036 return(XML_ERR_ENTITY_LOOP);
14037 }
14038
14039
Daniel Veillardcda96922001-08-21 10:56:31 +000014040 if (lst != NULL)
14041 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014042 if (string == NULL)
14043 return(-1);
14044
14045 size = xmlStrlen(string);
14046
14047 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
14048 if (ctxt == NULL) return(-1);
14049 ctxt->userData = ctxt;
14050 if (sax != NULL) {
14051 oldsax = ctxt->sax;
14052 ctxt->sax = sax;
14053 if (user_data != NULL)
14054 ctxt->userData = user_data;
14055 }
14056 newDoc = xmlNewDoc(BAD_CAST "1.0");
14057 if (newDoc == NULL) {
14058 xmlFreeParserCtxt(ctxt);
14059 return(-1);
14060 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000014061 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014062 if ((doc != NULL) && (doc->dict != NULL)) {
14063 xmlDictFree(ctxt->dict);
14064 ctxt->dict = doc->dict;
14065 xmlDictReference(ctxt->dict);
14066 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14067 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14068 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14069 ctxt->dictNames = 1;
14070 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000014071 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014072 }
Owen Taylor3473f882001-02-23 17:55:21 +000014073 if (doc != NULL) {
14074 newDoc->intSubset = doc->intSubset;
14075 newDoc->extSubset = doc->extSubset;
14076 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014077 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14078 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000014079 if (sax != NULL)
14080 ctxt->sax = oldsax;
14081 xmlFreeParserCtxt(ctxt);
14082 newDoc->intSubset = NULL;
14083 newDoc->extSubset = NULL;
14084 xmlFreeDoc(newDoc);
14085 return(-1);
14086 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014087 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14088 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000014089 if (doc == NULL) {
14090 ctxt->myDoc = newDoc;
14091 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000014092 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000014093 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000014094 /* Ensure that doc has XML spec namespace */
14095 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14096 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000014097 }
14098 ctxt->instate = XML_PARSER_CONTENT;
14099 ctxt->depth = depth;
14100
14101 /*
14102 * Doing validity checking on chunk doesn't make sense
14103 */
14104 ctxt->validate = 0;
14105 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014106 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014107
Daniel Veillardb39bc392002-10-26 19:29:51 +000014108 if ( doc != NULL ){
14109 content = doc->children;
14110 doc->children = NULL;
14111 xmlParseContent(ctxt);
14112 doc->children = content;
14113 }
14114 else {
14115 xmlParseContent(ctxt);
14116 }
Owen Taylor3473f882001-02-23 17:55:21 +000014117 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014118 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014119 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014120 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014121 }
14122 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014123 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014124 }
14125
14126 if (!ctxt->wellFormed) {
14127 if (ctxt->errNo == 0)
14128 ret = 1;
14129 else
14130 ret = ctxt->errNo;
14131 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000014132 ret = 0;
14133 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014134
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014135 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14136 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000014137
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014138 /*
14139 * Return the newly created nodeset after unlinking it from
14140 * they pseudo parent.
14141 */
14142 cur = newDoc->children->children;
14143 *lst = cur;
14144 while (cur != NULL) {
14145 xmlSetTreeDoc(cur, doc);
14146 cur->parent = NULL;
14147 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000014148 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014149 newDoc->children->children = NULL;
14150 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014151
14152 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000014153 ctxt->sax = oldsax;
14154 xmlFreeParserCtxt(ctxt);
14155 newDoc->intSubset = NULL;
14156 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000014157 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014158 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000014159
Owen Taylor3473f882001-02-23 17:55:21 +000014160 return(ret);
14161}
14162
14163/**
14164 * xmlSAXParseEntity:
14165 * @sax: the SAX handler block
14166 * @filename: the filename
14167 *
14168 * parse an XML external entity out of context and build a tree.
14169 * It use the given SAX function block to handle the parsing callback.
14170 * If sax is NULL, fallback to the default DOM tree building routines.
14171 *
14172 * [78] extParsedEnt ::= TextDecl? content
14173 *
14174 * This correspond to a "Well Balanced" chunk
14175 *
14176 * Returns the resulting document tree
14177 */
14178
14179xmlDocPtr
14180xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14181 xmlDocPtr ret;
14182 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014183
14184 ctxt = xmlCreateFileParserCtxt(filename);
14185 if (ctxt == NULL) {
14186 return(NULL);
14187 }
14188 if (sax != NULL) {
14189 if (ctxt->sax != NULL)
14190 xmlFree(ctxt->sax);
14191 ctxt->sax = sax;
14192 ctxt->userData = NULL;
14193 }
14194
Owen Taylor3473f882001-02-23 17:55:21 +000014195 xmlParseExtParsedEnt(ctxt);
14196
14197 if (ctxt->wellFormed)
14198 ret = ctxt->myDoc;
14199 else {
14200 ret = NULL;
14201 xmlFreeDoc(ctxt->myDoc);
14202 ctxt->myDoc = NULL;
14203 }
14204 if (sax != NULL)
14205 ctxt->sax = NULL;
14206 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000014207
Owen Taylor3473f882001-02-23 17:55:21 +000014208 return(ret);
14209}
14210
14211/**
14212 * xmlParseEntity:
14213 * @filename: the filename
14214 *
14215 * parse an XML external entity out of context and build a tree.
14216 *
14217 * [78] extParsedEnt ::= TextDecl? content
14218 *
14219 * This correspond to a "Well Balanced" chunk
14220 *
14221 * Returns the resulting document tree
14222 */
14223
14224xmlDocPtr
14225xmlParseEntity(const char *filename) {
14226 return(xmlSAXParseEntity(NULL, filename));
14227}
Daniel Veillard81273902003-09-30 00:43:48 +000014228#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014229
14230/**
Rob Richards9c0aa472009-03-26 18:10:19 +000014231 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000014232 * @URL: the entity URL
14233 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000014234 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000014235 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000014236 *
14237 * Create a parser context for an external entity
14238 * Automatic support for ZLIB/Compress compressed document is provided
14239 * by default if found at compile-time.
14240 *
14241 * Returns the new parser context or NULL
14242 */
Rob Richards9c0aa472009-03-26 18:10:19 +000014243static xmlParserCtxtPtr
14244xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14245 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000014246 xmlParserCtxtPtr ctxt;
14247 xmlParserInputPtr inputStream;
14248 char *directory = NULL;
14249 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000014250
Owen Taylor3473f882001-02-23 17:55:21 +000014251 ctxt = xmlNewParserCtxt();
14252 if (ctxt == NULL) {
14253 return(NULL);
14254 }
14255
Daniel Veillard48247b42009-07-10 16:12:46 +020014256 if (pctx != NULL) {
14257 ctxt->options = pctx->options;
14258 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014259 }
14260
Owen Taylor3473f882001-02-23 17:55:21 +000014261 uri = xmlBuildURI(URL, base);
14262
14263 if (uri == NULL) {
14264 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14265 if (inputStream == NULL) {
14266 xmlFreeParserCtxt(ctxt);
14267 return(NULL);
14268 }
14269
14270 inputPush(ctxt, inputStream);
14271
14272 if ((ctxt->directory == NULL) && (directory == NULL))
14273 directory = xmlParserGetDirectory((char *)URL);
14274 if ((ctxt->directory == NULL) && (directory != NULL))
14275 ctxt->directory = directory;
14276 } else {
14277 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14278 if (inputStream == NULL) {
14279 xmlFree(uri);
14280 xmlFreeParserCtxt(ctxt);
14281 return(NULL);
14282 }
14283
14284 inputPush(ctxt, inputStream);
14285
14286 if ((ctxt->directory == NULL) && (directory == NULL))
14287 directory = xmlParserGetDirectory((char *)uri);
14288 if ((ctxt->directory == NULL) && (directory != NULL))
14289 ctxt->directory = directory;
14290 xmlFree(uri);
14291 }
Owen Taylor3473f882001-02-23 17:55:21 +000014292 return(ctxt);
14293}
14294
Rob Richards9c0aa472009-03-26 18:10:19 +000014295/**
14296 * xmlCreateEntityParserCtxt:
14297 * @URL: the entity URL
14298 * @ID: the entity PUBLIC ID
14299 * @base: a possible base for the target URI
14300 *
14301 * Create a parser context for an external entity
14302 * Automatic support for ZLIB/Compress compressed document is provided
14303 * by default if found at compile-time.
14304 *
14305 * Returns the new parser context or NULL
14306 */
14307xmlParserCtxtPtr
14308xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14309 const xmlChar *base) {
14310 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14311
14312}
14313
Owen Taylor3473f882001-02-23 17:55:21 +000014314/************************************************************************
14315 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014316 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014317 * *
14318 ************************************************************************/
14319
14320/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014321 * xmlCreateURLParserCtxt:
14322 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014323 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014324 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014325 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014326 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014327 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014328 *
14329 * Returns the new parser context or NULL
14330 */
14331xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014332xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014333{
14334 xmlParserCtxtPtr ctxt;
14335 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014336 char *directory = NULL;
14337
Owen Taylor3473f882001-02-23 17:55:21 +000014338 ctxt = xmlNewParserCtxt();
14339 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014340 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014341 return(NULL);
14342 }
14343
Daniel Veillarddf292f72005-01-16 19:00:15 +000014344 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014345 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014346 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014347
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014348 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014349 if (inputStream == NULL) {
14350 xmlFreeParserCtxt(ctxt);
14351 return(NULL);
14352 }
14353
Owen Taylor3473f882001-02-23 17:55:21 +000014354 inputPush(ctxt, inputStream);
14355 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014356 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014357 if ((ctxt->directory == NULL) && (directory != NULL))
14358 ctxt->directory = directory;
14359
14360 return(ctxt);
14361}
14362
Daniel Veillard61b93382003-11-03 14:28:31 +000014363/**
14364 * xmlCreateFileParserCtxt:
14365 * @filename: the filename
14366 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014367 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014368 * Automatic support for ZLIB/Compress compressed document is provided
14369 * by default if found at compile-time.
14370 *
14371 * Returns the new parser context or NULL
14372 */
14373xmlParserCtxtPtr
14374xmlCreateFileParserCtxt(const char *filename)
14375{
14376 return(xmlCreateURLParserCtxt(filename, 0));
14377}
14378
Daniel Veillard81273902003-09-30 00:43:48 +000014379#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014380/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014381 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014382 * @sax: the SAX handler block
14383 * @filename: the filename
14384 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14385 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014386 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014387 *
14388 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14389 * compressed document is provided by default if found at compile-time.
14390 * It use the given SAX function block to handle the parsing callback.
14391 * If sax is NULL, fallback to the default DOM tree building routines.
14392 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014393 * User data (void *) is stored within the parser context in the
14394 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014395 *
Owen Taylor3473f882001-02-23 17:55:21 +000014396 * Returns the resulting document tree
14397 */
14398
14399xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014400xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14401 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014402 xmlDocPtr ret;
14403 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014404
Daniel Veillard635ef722001-10-29 11:48:19 +000014405 xmlInitParser();
14406
Owen Taylor3473f882001-02-23 17:55:21 +000014407 ctxt = xmlCreateFileParserCtxt(filename);
14408 if (ctxt == NULL) {
14409 return(NULL);
14410 }
14411 if (sax != NULL) {
14412 if (ctxt->sax != NULL)
14413 xmlFree(ctxt->sax);
14414 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014415 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014416 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014417 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014418 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014419 }
Owen Taylor3473f882001-02-23 17:55:21 +000014420
Daniel Veillard37d2d162008-03-14 10:54:00 +000014421 if (ctxt->directory == NULL)
14422 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014423
Daniel Veillarddad3f682002-11-17 16:47:27 +000014424 ctxt->recovery = recovery;
14425
Owen Taylor3473f882001-02-23 17:55:21 +000014426 xmlParseDocument(ctxt);
14427
William M. Brackc07329e2003-09-08 01:57:30 +000014428 if ((ctxt->wellFormed) || recovery) {
14429 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014430 if (ret != NULL) {
14431 if (ctxt->input->buf->compressed > 0)
14432 ret->compression = 9;
14433 else
14434 ret->compression = ctxt->input->buf->compressed;
14435 }
William M. Brackc07329e2003-09-08 01:57:30 +000014436 }
Owen Taylor3473f882001-02-23 17:55:21 +000014437 else {
14438 ret = NULL;
14439 xmlFreeDoc(ctxt->myDoc);
14440 ctxt->myDoc = NULL;
14441 }
14442 if (sax != NULL)
14443 ctxt->sax = NULL;
14444 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014445
Owen Taylor3473f882001-02-23 17:55:21 +000014446 return(ret);
14447}
14448
14449/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014450 * xmlSAXParseFile:
14451 * @sax: the SAX handler block
14452 * @filename: the filename
14453 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14454 * documents
14455 *
14456 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14457 * compressed document is provided by default if found at compile-time.
14458 * It use the given SAX function block to handle the parsing callback.
14459 * If sax is NULL, fallback to the default DOM tree building routines.
14460 *
14461 * Returns the resulting document tree
14462 */
14463
14464xmlDocPtr
14465xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14466 int recovery) {
14467 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14468}
14469
14470/**
Owen Taylor3473f882001-02-23 17:55:21 +000014471 * xmlRecoverDoc:
14472 * @cur: a pointer to an array of xmlChar
14473 *
14474 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014475 * In the case the document is not Well Formed, a attempt to build a
14476 * tree is tried anyway
14477 *
14478 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014479 */
14480
14481xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014482xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014483 return(xmlSAXParseDoc(NULL, cur, 1));
14484}
14485
14486/**
14487 * xmlParseFile:
14488 * @filename: the filename
14489 *
14490 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14491 * compressed document is provided by default if found at compile-time.
14492 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014493 * Returns the resulting document tree if the file was wellformed,
14494 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014495 */
14496
14497xmlDocPtr
14498xmlParseFile(const char *filename) {
14499 return(xmlSAXParseFile(NULL, filename, 0));
14500}
14501
14502/**
14503 * xmlRecoverFile:
14504 * @filename: the filename
14505 *
14506 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14507 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014508 * In the case the document is not Well Formed, it attempts to build
14509 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014510 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014511 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014512 */
14513
14514xmlDocPtr
14515xmlRecoverFile(const char *filename) {
14516 return(xmlSAXParseFile(NULL, filename, 1));
14517}
14518
14519
14520/**
14521 * xmlSetupParserForBuffer:
14522 * @ctxt: an XML parser context
14523 * @buffer: a xmlChar * buffer
14524 * @filename: a file name
14525 *
14526 * Setup the parser context to parse a new buffer; Clears any prior
14527 * contents from the parser context. The buffer parameter must not be
14528 * NULL, but the filename parameter can be
14529 */
14530void
14531xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14532 const char* filename)
14533{
14534 xmlParserInputPtr input;
14535
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014536 if ((ctxt == NULL) || (buffer == NULL))
14537 return;
14538
Owen Taylor3473f882001-02-23 17:55:21 +000014539 input = xmlNewInputStream(ctxt);
14540 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014541 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014542 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014543 return;
14544 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014545
Owen Taylor3473f882001-02-23 17:55:21 +000014546 xmlClearParserCtxt(ctxt);
14547 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014548 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014549 input->base = buffer;
14550 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014551 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014552 inputPush(ctxt, input);
14553}
14554
14555/**
14556 * xmlSAXUserParseFile:
14557 * @sax: a SAX handler
14558 * @user_data: The user data returned on SAX callbacks
14559 * @filename: a file name
14560 *
14561 * parse an XML file and call the given SAX handler routines.
14562 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014563 *
Owen Taylor3473f882001-02-23 17:55:21 +000014564 * Returns 0 in case of success or a error number otherwise
14565 */
14566int
14567xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14568 const char *filename) {
14569 int ret = 0;
14570 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014571
Owen Taylor3473f882001-02-23 17:55:21 +000014572 ctxt = xmlCreateFileParserCtxt(filename);
14573 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014574 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014575 xmlFree(ctxt->sax);
14576 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014577 xmlDetectSAX2(ctxt);
14578
Owen Taylor3473f882001-02-23 17:55:21 +000014579 if (user_data != NULL)
14580 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014581
Owen Taylor3473f882001-02-23 17:55:21 +000014582 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014583
Owen Taylor3473f882001-02-23 17:55:21 +000014584 if (ctxt->wellFormed)
14585 ret = 0;
14586 else {
14587 if (ctxt->errNo != 0)
14588 ret = ctxt->errNo;
14589 else
14590 ret = -1;
14591 }
14592 if (sax != NULL)
14593 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014594 if (ctxt->myDoc != NULL) {
14595 xmlFreeDoc(ctxt->myDoc);
14596 ctxt->myDoc = NULL;
14597 }
Owen Taylor3473f882001-02-23 17:55:21 +000014598 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014599
Owen Taylor3473f882001-02-23 17:55:21 +000014600 return ret;
14601}
Daniel Veillard81273902003-09-30 00:43:48 +000014602#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014603
14604/************************************************************************
14605 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014606 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014607 * *
14608 ************************************************************************/
14609
14610/**
14611 * xmlCreateMemoryParserCtxt:
14612 * @buffer: a pointer to a char array
14613 * @size: the size of the array
14614 *
14615 * Create a parser context for an XML in-memory document.
14616 *
14617 * Returns the new parser context or NULL
14618 */
14619xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014620xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014621 xmlParserCtxtPtr ctxt;
14622 xmlParserInputPtr input;
14623 xmlParserInputBufferPtr buf;
14624
14625 if (buffer == NULL)
14626 return(NULL);
14627 if (size <= 0)
14628 return(NULL);
14629
14630 ctxt = xmlNewParserCtxt();
14631 if (ctxt == NULL)
14632 return(NULL);
14633
Daniel Veillard53350552003-09-18 13:35:51 +000014634 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014635 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014636 if (buf == NULL) {
14637 xmlFreeParserCtxt(ctxt);
14638 return(NULL);
14639 }
Owen Taylor3473f882001-02-23 17:55:21 +000014640
14641 input = xmlNewInputStream(ctxt);
14642 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014643 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014644 xmlFreeParserCtxt(ctxt);
14645 return(NULL);
14646 }
14647
14648 input->filename = NULL;
14649 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014650 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014651
14652 inputPush(ctxt, input);
14653 return(ctxt);
14654}
14655
Daniel Veillard81273902003-09-30 00:43:48 +000014656#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014657/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014658 * xmlSAXParseMemoryWithData:
14659 * @sax: the SAX handler block
14660 * @buffer: an pointer to a char array
14661 * @size: the size of the array
14662 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14663 * documents
14664 * @data: the userdata
14665 *
14666 * parse an XML in-memory block and use the given SAX function block
14667 * to handle the parsing callback. If sax is NULL, fallback to the default
14668 * DOM tree building routines.
14669 *
14670 * User data (void *) is stored within the parser context in the
14671 * context's _private member, so it is available nearly everywhere in libxml
14672 *
14673 * Returns the resulting document tree
14674 */
14675
14676xmlDocPtr
14677xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14678 int size, int recovery, void *data) {
14679 xmlDocPtr ret;
14680 xmlParserCtxtPtr ctxt;
14681
Daniel Veillardab2a7632009-07-09 08:45:03 +020014682 xmlInitParser();
14683
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014684 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14685 if (ctxt == NULL) return(NULL);
14686 if (sax != NULL) {
14687 if (ctxt->sax != NULL)
14688 xmlFree(ctxt->sax);
14689 ctxt->sax = sax;
14690 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014691 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014692 if (data!=NULL) {
14693 ctxt->_private=data;
14694 }
14695
Daniel Veillardadba5f12003-04-04 16:09:01 +000014696 ctxt->recovery = recovery;
14697
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014698 xmlParseDocument(ctxt);
14699
14700 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14701 else {
14702 ret = NULL;
14703 xmlFreeDoc(ctxt->myDoc);
14704 ctxt->myDoc = NULL;
14705 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014706 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014707 ctxt->sax = NULL;
14708 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014709
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014710 return(ret);
14711}
14712
14713/**
Owen Taylor3473f882001-02-23 17:55:21 +000014714 * xmlSAXParseMemory:
14715 * @sax: the SAX handler block
14716 * @buffer: an pointer to a char array
14717 * @size: the size of the array
14718 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14719 * documents
14720 *
14721 * parse an XML in-memory block and use the given SAX function block
14722 * to handle the parsing callback. If sax is NULL, fallback to the default
14723 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014724 *
Owen Taylor3473f882001-02-23 17:55:21 +000014725 * Returns the resulting document tree
14726 */
14727xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014728xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14729 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014730 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014731}
14732
14733/**
14734 * xmlParseMemory:
14735 * @buffer: an pointer to a char array
14736 * @size: the size of the array
14737 *
14738 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014739 *
Owen Taylor3473f882001-02-23 17:55:21 +000014740 * Returns the resulting document tree
14741 */
14742
Daniel Veillard50822cb2001-07-26 20:05:51 +000014743xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014744 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14745}
14746
14747/**
14748 * xmlRecoverMemory:
14749 * @buffer: an pointer to a char array
14750 * @size: the size of the array
14751 *
14752 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014753 * In the case the document is not Well Formed, an attempt to
14754 * build a tree is tried anyway
14755 *
14756 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014757 */
14758
Daniel Veillard50822cb2001-07-26 20:05:51 +000014759xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014760 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14761}
14762
14763/**
14764 * xmlSAXUserParseMemory:
14765 * @sax: a SAX handler
14766 * @user_data: The user data returned on SAX callbacks
14767 * @buffer: an in-memory XML document input
14768 * @size: the length of the XML document in bytes
14769 *
14770 * A better SAX parsing routine.
14771 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014772 *
Owen Taylor3473f882001-02-23 17:55:21 +000014773 * Returns 0 in case of success or a error number otherwise
14774 */
14775int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014776 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014777 int ret = 0;
14778 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014779
14780 xmlInitParser();
14781
Owen Taylor3473f882001-02-23 17:55:21 +000014782 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14783 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014784 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14785 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014786 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014787 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014788
Daniel Veillard30211a02001-04-26 09:33:18 +000014789 if (user_data != NULL)
14790 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014791
Owen Taylor3473f882001-02-23 17:55:21 +000014792 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014793
Owen Taylor3473f882001-02-23 17:55:21 +000014794 if (ctxt->wellFormed)
14795 ret = 0;
14796 else {
14797 if (ctxt->errNo != 0)
14798 ret = ctxt->errNo;
14799 else
14800 ret = -1;
14801 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014802 if (sax != NULL)
14803 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014804 if (ctxt->myDoc != NULL) {
14805 xmlFreeDoc(ctxt->myDoc);
14806 ctxt->myDoc = NULL;
14807 }
Owen Taylor3473f882001-02-23 17:55:21 +000014808 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014809
Owen Taylor3473f882001-02-23 17:55:21 +000014810 return ret;
14811}
Daniel Veillard81273902003-09-30 00:43:48 +000014812#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014813
14814/**
14815 * xmlCreateDocParserCtxt:
14816 * @cur: a pointer to an array of xmlChar
14817 *
14818 * Creates a parser context for an XML in-memory document.
14819 *
14820 * Returns the new parser context or NULL
14821 */
14822xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014823xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014824 int len;
14825
14826 if (cur == NULL)
14827 return(NULL);
14828 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014829 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014830}
14831
Daniel Veillard81273902003-09-30 00:43:48 +000014832#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014833/**
14834 * xmlSAXParseDoc:
14835 * @sax: the SAX handler block
14836 * @cur: a pointer to an array of xmlChar
14837 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14838 * documents
14839 *
14840 * parse an XML in-memory document and build a tree.
14841 * It use the given SAX function block to handle the parsing callback.
14842 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014843 *
Owen Taylor3473f882001-02-23 17:55:21 +000014844 * Returns the resulting document tree
14845 */
14846
14847xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014848xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014849 xmlDocPtr ret;
14850 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014851 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014852
Daniel Veillard38936062004-11-04 17:45:11 +000014853 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014854
14855
14856 ctxt = xmlCreateDocParserCtxt(cur);
14857 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014858 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014859 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014860 ctxt->sax = sax;
14861 ctxt->userData = NULL;
14862 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014863 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014864
14865 xmlParseDocument(ctxt);
14866 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14867 else {
14868 ret = NULL;
14869 xmlFreeDoc(ctxt->myDoc);
14870 ctxt->myDoc = NULL;
14871 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014872 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014873 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014874 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014875
Owen Taylor3473f882001-02-23 17:55:21 +000014876 return(ret);
14877}
14878
14879/**
14880 * xmlParseDoc:
14881 * @cur: a pointer to an array of xmlChar
14882 *
14883 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014884 *
Owen Taylor3473f882001-02-23 17:55:21 +000014885 * Returns the resulting document tree
14886 */
14887
14888xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014889xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014890 return(xmlSAXParseDoc(NULL, cur, 0));
14891}
Daniel Veillard81273902003-09-30 00:43:48 +000014892#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014893
Daniel Veillard81273902003-09-30 00:43:48 +000014894#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014895/************************************************************************
14896 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014897 * Specific function to keep track of entities references *
14898 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014899 * *
14900 ************************************************************************/
14901
14902static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14903
14904/**
14905 * xmlAddEntityReference:
14906 * @ent : A valid entity
14907 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014908 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014909 *
14910 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14911 */
14912static void
14913xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14914 xmlNodePtr lastNode)
14915{
14916 if (xmlEntityRefFunc != NULL) {
14917 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14918 }
14919}
14920
14921
14922/**
14923 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014924 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014925 *
14926 * Set the function to call call back when a xml reference has been made
14927 */
14928void
14929xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14930{
14931 xmlEntityRefFunc = func;
14932}
Daniel Veillard81273902003-09-30 00:43:48 +000014933#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014934
14935/************************************************************************
14936 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014937 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014938 * *
14939 ************************************************************************/
14940
14941#ifdef LIBXML_XPATH_ENABLED
14942#include <libxml/xpath.h>
14943#endif
14944
Daniel Veillardffa3c742005-07-21 13:24:09 +000014945extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014946static int xmlParserInitialized = 0;
14947
14948/**
14949 * xmlInitParser:
14950 *
14951 * Initialization function for the XML parser.
14952 * This is not reentrant. Call once before processing in case of
14953 * use in multithreaded programs.
14954 */
14955
14956void
14957xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014958 if (xmlParserInitialized != 0)
14959 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014960
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014961#ifdef LIBXML_THREAD_ENABLED
14962 __xmlGlobalInitMutexLock();
14963 if (xmlParserInitialized == 0) {
14964#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014965 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014966 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014967 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14968 (xmlGenericError == NULL))
14969 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014970 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014971 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014972 xmlInitCharEncodingHandlers();
14973 xmlDefaultSAXHandlerInit();
14974 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014975#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014976 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014977#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014978#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014979 htmlInitAutoClose();
14980 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014981#endif
14982#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014983 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014984#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014985 xmlParserInitialized = 1;
14986#ifdef LIBXML_THREAD_ENABLED
14987 }
14988 __xmlGlobalInitMutexUnlock();
14989#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014990}
14991
14992/**
14993 * xmlCleanupParser:
14994 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014995 * This function name is somewhat misleading. It does not clean up
14996 * parser state, it cleans up memory allocated by the library itself.
14997 * It is a cleanup function for the XML library. It tries to reclaim all
14998 * related global memory allocated for the library processing.
14999 * It doesn't deallocate any document related memory. One should
15000 * call xmlCleanupParser() only when the process has finished using
15001 * the library and all XML/HTML documents built with it.
15002 * See also xmlInitParser() which has the opposite function of preparing
15003 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000015004 *
15005 * WARNING: if your application is multithreaded or has plugin support
15006 * calling this may crash the application if another thread or
15007 * a plugin is still using libxml2. It's sometimes very hard to
15008 * guess if libxml2 is in use in the application, some libraries
15009 * or plugins may use it without notice. In case of doubt abstain
15010 * from calling this function or do it just before calling exit()
15011 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000015012 */
15013
15014void
15015xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000015016 if (!xmlParserInitialized)
15017 return;
15018
Owen Taylor3473f882001-02-23 17:55:21 +000015019 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000015020#ifdef LIBXML_CATALOG_ENABLED
15021 xmlCatalogCleanup();
15022#endif
Daniel Veillard14412512005-01-21 23:53:26 +000015023 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000015024 xmlCleanupInputCallbacks();
15025#ifdef LIBXML_OUTPUT_ENABLED
15026 xmlCleanupOutputCallbacks();
15027#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015028#ifdef LIBXML_SCHEMAS_ENABLED
15029 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000015030 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015031#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000015032 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080015033 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000015034 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000015035 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000015036 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000015037}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015038
15039/************************************************************************
15040 * *
15041 * New set (2.6.0) of simpler and more flexible APIs *
15042 * *
15043 ************************************************************************/
15044
15045/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015046 * DICT_FREE:
15047 * @str: a string
15048 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020015049 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015050 * current scope
15051 */
15052#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015053 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015054 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
15055 xmlFree((char *)(str));
15056
15057/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015058 * xmlCtxtReset:
15059 * @ctxt: an XML parser context
15060 *
15061 * Reset a parser context
15062 */
15063void
15064xmlCtxtReset(xmlParserCtxtPtr ctxt)
15065{
15066 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015067 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015068
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015069 if (ctxt == NULL)
15070 return;
15071
15072 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015073
15074 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15075 xmlFreeInputStream(input);
15076 }
15077 ctxt->inputNr = 0;
15078 ctxt->input = NULL;
15079
15080 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000015081 if (ctxt->spaceTab != NULL) {
15082 ctxt->spaceTab[0] = -1;
15083 ctxt->space = &ctxt->spaceTab[0];
15084 } else {
15085 ctxt->space = NULL;
15086 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015087
15088
15089 ctxt->nodeNr = 0;
15090 ctxt->node = NULL;
15091
15092 ctxt->nameNr = 0;
15093 ctxt->name = NULL;
15094
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015095 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015096 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015097 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015098 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015099 DICT_FREE(ctxt->directory);
15100 ctxt->directory = NULL;
15101 DICT_FREE(ctxt->extSubURI);
15102 ctxt->extSubURI = NULL;
15103 DICT_FREE(ctxt->extSubSystem);
15104 ctxt->extSubSystem = NULL;
15105 if (ctxt->myDoc != NULL)
15106 xmlFreeDoc(ctxt->myDoc);
15107 ctxt->myDoc = NULL;
15108
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015109 ctxt->standalone = -1;
15110 ctxt->hasExternalSubset = 0;
15111 ctxt->hasPErefs = 0;
15112 ctxt->html = 0;
15113 ctxt->external = 0;
15114 ctxt->instate = XML_PARSER_START;
15115 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015116
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015117 ctxt->wellFormed = 1;
15118 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000015119 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015120 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015121#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015122 ctxt->vctxt.userData = ctxt;
15123 ctxt->vctxt.error = xmlParserValidityError;
15124 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015125#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015126 ctxt->record_info = 0;
15127 ctxt->nbChars = 0;
15128 ctxt->checkIndex = 0;
15129 ctxt->inSubset = 0;
15130 ctxt->errNo = XML_ERR_OK;
15131 ctxt->depth = 0;
15132 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15133 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000015134 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000015135 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080015136 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015137 xmlInitNodeInfoSeq(&ctxt->node_seq);
15138
15139 if (ctxt->attsDefault != NULL) {
15140 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15141 ctxt->attsDefault = NULL;
15142 }
15143 if (ctxt->attsSpecial != NULL) {
15144 xmlHashFree(ctxt->attsSpecial, NULL);
15145 ctxt->attsSpecial = NULL;
15146 }
15147
Daniel Veillard4432df22003-09-28 18:58:27 +000015148#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015149 if (ctxt->catalogs != NULL)
15150 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000015151#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000015152 if (ctxt->lastError.code != XML_ERR_OK)
15153 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015154}
15155
15156/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015157 * xmlCtxtResetPush:
15158 * @ctxt: an XML parser context
15159 * @chunk: a pointer to an array of chars
15160 * @size: number of chars in the array
15161 * @filename: an optional file name or URI
15162 * @encoding: the document encoding, or NULL
15163 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015164 * Reset a push parser context
15165 *
15166 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015167 */
15168int
15169xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15170 int size, const char *filename, const char *encoding)
15171{
15172 xmlParserInputPtr inputStream;
15173 xmlParserInputBufferPtr buf;
15174 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15175
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015176 if (ctxt == NULL)
15177 return(1);
15178
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015179 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15180 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15181
15182 buf = xmlAllocParserInputBuffer(enc);
15183 if (buf == NULL)
15184 return(1);
15185
15186 if (ctxt == NULL) {
15187 xmlFreeParserInputBuffer(buf);
15188 return(1);
15189 }
15190
15191 xmlCtxtReset(ctxt);
15192
15193 if (ctxt->pushTab == NULL) {
15194 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15195 sizeof(xmlChar *));
15196 if (ctxt->pushTab == NULL) {
15197 xmlErrMemory(ctxt, NULL);
15198 xmlFreeParserInputBuffer(buf);
15199 return(1);
15200 }
15201 }
15202
15203 if (filename == NULL) {
15204 ctxt->directory = NULL;
15205 } else {
15206 ctxt->directory = xmlParserGetDirectory(filename);
15207 }
15208
15209 inputStream = xmlNewInputStream(ctxt);
15210 if (inputStream == NULL) {
15211 xmlFreeParserInputBuffer(buf);
15212 return(1);
15213 }
15214
15215 if (filename == NULL)
15216 inputStream->filename = NULL;
15217 else
15218 inputStream->filename = (char *)
15219 xmlCanonicPath((const xmlChar *) filename);
15220 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080015221 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015222
15223 inputPush(ctxt, inputStream);
15224
15225 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15226 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015227 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15228 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015229
15230 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15231
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015232 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015233#ifdef DEBUG_PUSH
15234 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15235#endif
15236 }
15237
15238 if (encoding != NULL) {
15239 xmlCharEncodingHandlerPtr hdlr;
15240
Daniel Veillard37334572008-07-31 08:20:02 +000015241 if (ctxt->encoding != NULL)
15242 xmlFree((xmlChar *) ctxt->encoding);
15243 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15244
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015245 hdlr = xmlFindCharEncodingHandler(encoding);
15246 if (hdlr != NULL) {
15247 xmlSwitchToEncoding(ctxt, hdlr);
15248 } else {
15249 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15250 "Unsupported encoding %s\n", BAD_CAST encoding);
15251 }
15252 } else if (enc != XML_CHAR_ENCODING_NONE) {
15253 xmlSwitchEncoding(ctxt, enc);
15254 }
15255
15256 return(0);
15257}
15258
Daniel Veillard37334572008-07-31 08:20:02 +000015259
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015260/**
Daniel Veillard37334572008-07-31 08:20:02 +000015261 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015262 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015263 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015264 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015265 *
15266 * Applies the options to the parser context
15267 *
15268 * Returns 0 in case of success, the set of unknown or unimplemented options
15269 * in case of error.
15270 */
Daniel Veillard37334572008-07-31 08:20:02 +000015271static int
15272xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015273{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015274 if (ctxt == NULL)
15275 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015276 if (encoding != NULL) {
15277 if (ctxt->encoding != NULL)
15278 xmlFree((xmlChar *) ctxt->encoding);
15279 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15280 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015281 if (options & XML_PARSE_RECOVER) {
15282 ctxt->recovery = 1;
15283 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015284 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015285 } else
15286 ctxt->recovery = 0;
15287 if (options & XML_PARSE_DTDLOAD) {
15288 ctxt->loadsubset = XML_DETECT_IDS;
15289 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015290 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015291 } else
15292 ctxt->loadsubset = 0;
15293 if (options & XML_PARSE_DTDATTR) {
15294 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15295 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015296 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015297 }
15298 if (options & XML_PARSE_NOENT) {
15299 ctxt->replaceEntities = 1;
15300 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15301 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015302 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015303 } else
15304 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015305 if (options & XML_PARSE_PEDANTIC) {
15306 ctxt->pedantic = 1;
15307 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015308 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015309 } else
15310 ctxt->pedantic = 0;
15311 if (options & XML_PARSE_NOBLANKS) {
15312 ctxt->keepBlanks = 0;
15313 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15314 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015315 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015316 } else
15317 ctxt->keepBlanks = 1;
15318 if (options & XML_PARSE_DTDVALID) {
15319 ctxt->validate = 1;
15320 if (options & XML_PARSE_NOWARNING)
15321 ctxt->vctxt.warning = NULL;
15322 if (options & XML_PARSE_NOERROR)
15323 ctxt->vctxt.error = NULL;
15324 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015325 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015326 } else
15327 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015328 if (options & XML_PARSE_NOWARNING) {
15329 ctxt->sax->warning = NULL;
15330 options -= XML_PARSE_NOWARNING;
15331 }
15332 if (options & XML_PARSE_NOERROR) {
15333 ctxt->sax->error = NULL;
15334 ctxt->sax->fatalError = NULL;
15335 options -= XML_PARSE_NOERROR;
15336 }
Daniel Veillard81273902003-09-30 00:43:48 +000015337#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015338 if (options & XML_PARSE_SAX1) {
15339 ctxt->sax->startElement = xmlSAX2StartElement;
15340 ctxt->sax->endElement = xmlSAX2EndElement;
15341 ctxt->sax->startElementNs = NULL;
15342 ctxt->sax->endElementNs = NULL;
15343 ctxt->sax->initialized = 1;
15344 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015345 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015346 }
Daniel Veillard81273902003-09-30 00:43:48 +000015347#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015348 if (options & XML_PARSE_NODICT) {
15349 ctxt->dictNames = 0;
15350 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015351 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015352 } else {
15353 ctxt->dictNames = 1;
15354 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015355 if (options & XML_PARSE_NOCDATA) {
15356 ctxt->sax->cdataBlock = NULL;
15357 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015358 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015359 }
15360 if (options & XML_PARSE_NSCLEAN) {
15361 ctxt->options |= XML_PARSE_NSCLEAN;
15362 options -= XML_PARSE_NSCLEAN;
15363 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015364 if (options & XML_PARSE_NONET) {
15365 ctxt->options |= XML_PARSE_NONET;
15366 options -= XML_PARSE_NONET;
15367 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015368 if (options & XML_PARSE_COMPACT) {
15369 ctxt->options |= XML_PARSE_COMPACT;
15370 options -= XML_PARSE_COMPACT;
15371 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015372 if (options & XML_PARSE_OLD10) {
15373 ctxt->options |= XML_PARSE_OLD10;
15374 options -= XML_PARSE_OLD10;
15375 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015376 if (options & XML_PARSE_NOBASEFIX) {
15377 ctxt->options |= XML_PARSE_NOBASEFIX;
15378 options -= XML_PARSE_NOBASEFIX;
15379 }
15380 if (options & XML_PARSE_HUGE) {
15381 ctxt->options |= XML_PARSE_HUGE;
15382 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015383 if (ctxt->dict != NULL)
15384 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015385 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015386 if (options & XML_PARSE_OLDSAX) {
15387 ctxt->options |= XML_PARSE_OLDSAX;
15388 options -= XML_PARSE_OLDSAX;
15389 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015390 if (options & XML_PARSE_IGNORE_ENC) {
15391 ctxt->options |= XML_PARSE_IGNORE_ENC;
15392 options -= XML_PARSE_IGNORE_ENC;
15393 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015394 if (options & XML_PARSE_BIG_LINES) {
15395 ctxt->options |= XML_PARSE_BIG_LINES;
15396 options -= XML_PARSE_BIG_LINES;
15397 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015398 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015399 return (options);
15400}
15401
15402/**
Daniel Veillard37334572008-07-31 08:20:02 +000015403 * xmlCtxtUseOptions:
15404 * @ctxt: an XML parser context
15405 * @options: a combination of xmlParserOption
15406 *
15407 * Applies the options to the parser context
15408 *
15409 * Returns 0 in case of success, the set of unknown or unimplemented options
15410 * in case of error.
15411 */
15412int
15413xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15414{
15415 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15416}
15417
15418/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015419 * xmlDoRead:
15420 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015421 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015422 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015423 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015424 * @reuse: keep the context for reuse
15425 *
15426 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015427 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428 * Returns the resulting document tree or NULL
15429 */
15430static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015431xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15432 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015433{
15434 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015435
15436 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015437 if (encoding != NULL) {
15438 xmlCharEncodingHandlerPtr hdlr;
15439
15440 hdlr = xmlFindCharEncodingHandler(encoding);
15441 if (hdlr != NULL)
15442 xmlSwitchToEncoding(ctxt, hdlr);
15443 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015444 if ((URL != NULL) && (ctxt->input != NULL) &&
15445 (ctxt->input->filename == NULL))
15446 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015447 xmlParseDocument(ctxt);
15448 if ((ctxt->wellFormed) || ctxt->recovery)
15449 ret = ctxt->myDoc;
15450 else {
15451 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015452 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015453 xmlFreeDoc(ctxt->myDoc);
15454 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015455 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015456 ctxt->myDoc = NULL;
15457 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015458 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015459 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015460
15461 return (ret);
15462}
15463
15464/**
15465 * xmlReadDoc:
15466 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015467 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015468 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015469 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015470 *
15471 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015472 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015473 * Returns the resulting document tree
15474 */
15475xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015476xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015477{
15478 xmlParserCtxtPtr ctxt;
15479
15480 if (cur == NULL)
15481 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015482 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015483
15484 ctxt = xmlCreateDocParserCtxt(cur);
15485 if (ctxt == NULL)
15486 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015487 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015488}
15489
15490/**
15491 * xmlReadFile:
15492 * @filename: a file or URL
15493 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015494 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015495 *
15496 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015497 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015498 * Returns the resulting document tree
15499 */
15500xmlDocPtr
15501xmlReadFile(const char *filename, const char *encoding, int options)
15502{
15503 xmlParserCtxtPtr ctxt;
15504
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015505 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015506 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015507 if (ctxt == NULL)
15508 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015509 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015510}
15511
15512/**
15513 * xmlReadMemory:
15514 * @buffer: a pointer to a char array
15515 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015516 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015517 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015518 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015519 *
15520 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015521 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015522 * Returns the resulting document tree
15523 */
15524xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015525xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015526{
15527 xmlParserCtxtPtr ctxt;
15528
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015529 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015530 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15531 if (ctxt == NULL)
15532 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015533 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015534}
15535
15536/**
15537 * xmlReadFd:
15538 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015539 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015540 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015541 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015542 *
15543 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015544 * NOTE that the file descriptor will not be closed when the
15545 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015546 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015547 * Returns the resulting document tree
15548 */
15549xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015550xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015551{
15552 xmlParserCtxtPtr ctxt;
15553 xmlParserInputBufferPtr input;
15554 xmlParserInputPtr stream;
15555
15556 if (fd < 0)
15557 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015558 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015559
15560 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15561 if (input == NULL)
15562 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015563 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015564 ctxt = xmlNewParserCtxt();
15565 if (ctxt == NULL) {
15566 xmlFreeParserInputBuffer(input);
15567 return (NULL);
15568 }
15569 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15570 if (stream == NULL) {
15571 xmlFreeParserInputBuffer(input);
15572 xmlFreeParserCtxt(ctxt);
15573 return (NULL);
15574 }
15575 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015576 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015577}
15578
15579/**
15580 * xmlReadIO:
15581 * @ioread: an I/O read function
15582 * @ioclose: an I/O close function
15583 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015584 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015585 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015586 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015587 *
15588 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015589 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015590 * Returns the resulting document tree
15591 */
15592xmlDocPtr
15593xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015594 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015595{
15596 xmlParserCtxtPtr ctxt;
15597 xmlParserInputBufferPtr input;
15598 xmlParserInputPtr stream;
15599
15600 if (ioread == NULL)
15601 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015602 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015603
15604 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15605 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015606 if (input == NULL) {
15607 if (ioclose != NULL)
15608 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015609 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015610 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015611 ctxt = xmlNewParserCtxt();
15612 if (ctxt == NULL) {
15613 xmlFreeParserInputBuffer(input);
15614 return (NULL);
15615 }
15616 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15617 if (stream == NULL) {
15618 xmlFreeParserInputBuffer(input);
15619 xmlFreeParserCtxt(ctxt);
15620 return (NULL);
15621 }
15622 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015623 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015624}
15625
15626/**
15627 * xmlCtxtReadDoc:
15628 * @ctxt: an XML parser context
15629 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015630 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015631 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015632 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015633 *
15634 * parse an XML in-memory document and build a tree.
15635 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015636 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015637 * Returns the resulting document tree
15638 */
15639xmlDocPtr
15640xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015641 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015642{
15643 xmlParserInputPtr stream;
15644
15645 if (cur == NULL)
15646 return (NULL);
15647 if (ctxt == NULL)
15648 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015649 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015650
15651 xmlCtxtReset(ctxt);
15652
15653 stream = xmlNewStringInputStream(ctxt, cur);
15654 if (stream == NULL) {
15655 return (NULL);
15656 }
15657 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015658 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015659}
15660
15661/**
15662 * xmlCtxtReadFile:
15663 * @ctxt: an XML parser context
15664 * @filename: a file or URL
15665 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015666 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015667 *
15668 * parse an XML file from the filesystem or the network.
15669 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015670 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015671 * Returns the resulting document tree
15672 */
15673xmlDocPtr
15674xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15675 const char *encoding, int options)
15676{
15677 xmlParserInputPtr stream;
15678
15679 if (filename == NULL)
15680 return (NULL);
15681 if (ctxt == NULL)
15682 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015683 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015684
15685 xmlCtxtReset(ctxt);
15686
Daniel Veillard29614c72004-11-26 10:47:26 +000015687 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015688 if (stream == NULL) {
15689 return (NULL);
15690 }
15691 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015692 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015693}
15694
15695/**
15696 * xmlCtxtReadMemory:
15697 * @ctxt: an XML parser context
15698 * @buffer: a pointer to a char array
15699 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015700 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015701 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015702 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015703 *
15704 * parse an XML in-memory document and build a tree.
15705 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015706 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015707 * Returns the resulting document tree
15708 */
15709xmlDocPtr
15710xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015711 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015712{
15713 xmlParserInputBufferPtr input;
15714 xmlParserInputPtr stream;
15715
15716 if (ctxt == NULL)
15717 return (NULL);
15718 if (buffer == NULL)
15719 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015720 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015721
15722 xmlCtxtReset(ctxt);
15723
15724 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15725 if (input == NULL) {
15726 return(NULL);
15727 }
15728
15729 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15730 if (stream == NULL) {
15731 xmlFreeParserInputBuffer(input);
15732 return(NULL);
15733 }
15734
15735 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015736 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015737}
15738
15739/**
15740 * xmlCtxtReadFd:
15741 * @ctxt: an XML parser context
15742 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015743 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015744 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015745 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015746 *
15747 * parse an XML from a file descriptor and build a tree.
15748 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015749 * NOTE that the file descriptor will not be closed when the
15750 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015751 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015752 * Returns the resulting document tree
15753 */
15754xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015755xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15756 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015757{
15758 xmlParserInputBufferPtr input;
15759 xmlParserInputPtr stream;
15760
15761 if (fd < 0)
15762 return (NULL);
15763 if (ctxt == NULL)
15764 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015765 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015766
15767 xmlCtxtReset(ctxt);
15768
15769
15770 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15771 if (input == NULL)
15772 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015773 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015774 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15775 if (stream == NULL) {
15776 xmlFreeParserInputBuffer(input);
15777 return (NULL);
15778 }
15779 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015780 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015781}
15782
15783/**
15784 * xmlCtxtReadIO:
15785 * @ctxt: an XML parser context
15786 * @ioread: an I/O read function
15787 * @ioclose: an I/O close function
15788 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015789 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015790 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015791 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015792 *
15793 * parse an XML document from I/O functions and source and build a tree.
15794 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015795 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015796 * Returns the resulting document tree
15797 */
15798xmlDocPtr
15799xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15800 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015801 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015802 const char *encoding, int options)
15803{
15804 xmlParserInputBufferPtr input;
15805 xmlParserInputPtr stream;
15806
15807 if (ioread == NULL)
15808 return (NULL);
15809 if (ctxt == NULL)
15810 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015811 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015812
15813 xmlCtxtReset(ctxt);
15814
15815 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15816 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015817 if (input == NULL) {
15818 if (ioclose != NULL)
15819 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015820 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015821 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015822 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15823 if (stream == NULL) {
15824 xmlFreeParserInputBuffer(input);
15825 return (NULL);
15826 }
15827 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015828 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015829}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015830
15831#define bottom_parser
15832#include "elfgcchack.h"