blob: 53a6b7f0c961a9ead7811e66acbdcacf79eee895 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080097static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
Daniel Veillard0161e632008-08-28 15:36:32 +000099/************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105#define XML_PARSER_BIG_ENTITY 1000
106#define XML_PARSER_LOT_ENTITY 5000
107
108/*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114#define XML_PARSER_NON_LINEAR 10
115
116/*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800126xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800127 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000128{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800129 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillardbdd66182016-05-23 12:27:58 +0800141 (ent->content != NULL) && (ent->checked == 0) &&
142 (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800143 unsigned long oldnbent = ctxt->nbentities;
144 xmlChar *rep;
145
146 ent->checked = 1;
147
Peter Simons8f30bdf2016-04-15 11:56:55 +0200148 ++ctxt->depth;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800149 rep = xmlStringDecodeEntities(ctxt, ent->content,
150 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +0200151 --ctxt->depth;
Daniel Veillardbdd66182016-05-23 12:27:58 +0800152 if (ctxt->errNo == XML_ERR_ENTITY_LOOP) {
153 ent->content[0] = 0;
154 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800155
156 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
157 if (rep != NULL) {
158 if (xmlStrchr(rep, '<'))
159 ent->checked |= 1;
160 xmlFree(rep);
161 rep = NULL;
162 }
163 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800164 if (replacement != 0) {
165 if (replacement < XML_MAX_TEXT_LENGTH)
166 return(0);
167
168 /*
169 * If the volume of entity copy reaches 10 times the
170 * amount of parsed data and over the large text threshold
171 * then that's very likely to be an abuse.
172 */
173 if (ctxt->input != NULL) {
174 consumed = ctxt->input->consumed +
175 (ctxt->input->cur - ctxt->input->base);
176 }
177 consumed += ctxt->sizeentities;
178
179 if (replacement < XML_PARSER_NON_LINEAR * consumed)
180 return(0);
181 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000182 /*
183 * Do the check based on the replacement size of the entity
184 */
185 if (size < XML_PARSER_BIG_ENTITY)
186 return(0);
187
188 /*
189 * A limit on the amount of text data reasonably used
190 */
191 if (ctxt->input != NULL) {
192 consumed = ctxt->input->consumed +
193 (ctxt->input->cur - ctxt->input->base);
194 }
195 consumed += ctxt->sizeentities;
196
197 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
198 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
199 return (0);
200 } else if (ent != NULL) {
201 /*
202 * use the number of parsed entities in the replacement
203 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800204 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000205
206 /*
207 * The amount of data parsed counting entities size only once
208 */
209 if (ctxt->input != NULL) {
210 consumed = ctxt->input->consumed +
211 (ctxt->input->cur - ctxt->input->base);
212 }
213 consumed += ctxt->sizeentities;
214
215 /*
216 * Check the density of entities for the amount of data
217 * knowing an entity reference will take at least 3 bytes
218 */
219 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
220 return (0);
221 } else {
222 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800223 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000224 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800225 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
226 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
227 (ctxt->nbentities <= 10000))
228 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000229 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000230 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
231 return (1);
232}
233
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000234/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000235 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000236 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000237 * arbitrary depth limit for the XML documents that we allow to
238 * process. This is not a limitation of the parser but a safety
239 * boundary feature. It can be disabled with the XML_PARSE_HUGE
240 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000241 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000242unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000243
Daniel Veillard0fb18932003-09-07 09:14:37 +0000244
Daniel Veillard0161e632008-08-28 15:36:32 +0000245
246#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000247#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000248#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000249#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
250
Daniel Veillard1f972e92012-08-15 10:16:37 +0800251/**
252 * XML_PARSER_CHUNK_SIZE
253 *
254 * When calling GROW that's the minimal amount of data
255 * the parser expected to have received. It is not a hard
256 * limit but an optimization when reading strings like Names
257 * It is not strictly needed as long as inputs available characters
258 * are followed by 0, which should be provided by the I/O level
259 */
260#define XML_PARSER_CHUNK_SIZE 100
261
Owen Taylor3473f882001-02-23 17:55:21 +0000262/*
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * List of XML prefixed PI allowed by W3C specs
264 */
265
Daniel Veillardb44025c2001-10-11 22:55:55 +0000266static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000267 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800268 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000269 NULL
270};
271
Daniel Veillarda07050d2003-10-19 14:46:32 +0000272
Owen Taylor3473f882001-02-23 17:55:21 +0000273/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200274static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
275 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000276
Daniel Veillard7d515752003-09-26 19:12:37 +0000277static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000278xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
279 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000280 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000281 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000282
Daniel Veillard37334572008-07-31 08:20:02 +0000283static int
284xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
285 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000286#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000287static void
288xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
289 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000290#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000291
Daniel Veillard7d515752003-09-26 19:12:37 +0000292static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000293xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
294 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000295
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000296static int
297xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
298
Daniel Veillarde57ec792003-09-10 10:50:59 +0000299/************************************************************************
300 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800301 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000302 * *
303 ************************************************************************/
304
305/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 * xmlErrAttributeDup:
307 * @ctxt: an XML parser context
308 * @prefix: the attribute prefix
309 * @localname: the attribute localname
310 *
311 * Handle a redefinition of attribute error
312 */
313static void
314xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
315 const xmlChar * localname)
316{
Daniel Veillard157fee02003-10-31 10:36:03 +0000317 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
318 (ctxt->instate == XML_PARSER_EOF))
319 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000320 if (ctxt != NULL)
321 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200322
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000323 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000324 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200325 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 (const char *) localname, NULL, NULL, 0, 0,
327 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000328 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000329 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200330 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 (const char *) prefix, (const char *) localname,
332 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
333 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000334 if (ctxt != NULL) {
335 ctxt->wellFormed = 0;
336 if (ctxt->recovery == 0)
337 ctxt->disableSAX = 1;
338 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339}
340
341/**
342 * xmlFatalErr:
343 * @ctxt: an XML parser context
344 * @error: the error number
345 * @extra: extra information string
346 *
347 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
348 */
349static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351{
352 const char *errmsg;
353
Daniel Veillard157fee02003-10-31 10:36:03 +0000354 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
355 (ctxt->instate == XML_PARSER_EOF))
356 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 switch (error) {
358 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 errmsg = "internal error";
369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800404 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800428 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000430 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800431 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000433 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800434 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000436 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000437 errmsg = "Fragment not allowed";
438 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000439 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800440 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000442 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800443 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000445 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800446 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000448 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800449 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000451 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800452 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000454 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800455 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000457 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800458 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000460 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800462 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000464 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800465 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800468 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
473 case XML_ERR_CONDSEC_INVALID_KEYWORD:
474 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000492 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000495 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800499 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000500 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000501 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800502 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800505 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000506 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000507 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800508 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000509 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000510 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800511 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000512 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000513 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800514 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000516 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800517 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000518 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000519 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800520 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000521 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000522 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800523 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000524 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000525 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800526 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800528 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800529 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800530 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000531#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000532 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800533 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000534 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000535#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000536 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800537 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000538 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000539 if (ctxt != NULL)
540 ctxt->errNo = error;
David Kilzer4472c3a2016-05-13 15:13:17 +0800541 if (info == NULL) {
542 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
544 errmsg);
545 } else {
546 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
547 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
548 errmsg, info);
549 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000550 if (ctxt != NULL) {
551 ctxt->wellFormed = 0;
552 if (ctxt->recovery == 0)
553 ctxt->disableSAX = 1;
554 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000555}
556
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000557/**
558 * xmlFatalErrMsg:
559 * @ctxt: an XML parser context
560 * @error: the error number
561 * @msg: the error message
562 *
563 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
564 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800565static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000566xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
567 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000568{
Daniel Veillard157fee02003-10-31 10:36:03 +0000569 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
570 (ctxt->instate == XML_PARSER_EOF))
571 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000572 if (ctxt != NULL)
573 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000574 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200575 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000576 if (ctxt != NULL) {
577 ctxt->wellFormed = 0;
578 if (ctxt->recovery == 0)
579 ctxt->disableSAX = 1;
580 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581}
582
583/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000584 * xmlWarningMsg:
585 * @ctxt: an XML parser context
586 * @error: the error number
587 * @msg: the error message
588 * @str1: extra data
589 * @str2: extra data
590 *
591 * Handle a warning.
592 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800593static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000594xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595 const char *msg, const xmlChar *str1, const xmlChar *str2)
596{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000597 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000598
Daniel Veillard157fee02003-10-31 10:36:03 +0000599 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
600 (ctxt->instate == XML_PARSER_EOF))
601 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000602 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
603 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000604 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200605 if (ctxt != NULL) {
606 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000607 (ctxt->sax) ? ctxt->sax->warning : NULL,
608 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000609 ctxt, NULL, XML_FROM_PARSER, error,
610 XML_ERR_WARNING, NULL, 0,
611 (const char *) str1, (const char *) str2, NULL, 0, 0,
612 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200613 } else {
614 __xmlRaiseError(schannel, NULL, NULL,
615 ctxt, NULL, XML_FROM_PARSER, error,
616 XML_ERR_WARNING, NULL, 0,
617 (const char *) str1, (const char *) str2, NULL, 0, 0,
618 msg, (const char *) str1, (const char *) str2);
619 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000620}
621
622/**
623 * xmlValidityError:
624 * @ctxt: an XML parser context
625 * @error: the error number
626 * @msg: the error message
627 * @str1: extra data
628 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000629 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000630 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800631static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000632xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000633 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000634{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000635 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000636
637 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
638 (ctxt->instate == XML_PARSER_EOF))
639 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000640 if (ctxt != NULL) {
641 ctxt->errNo = error;
642 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
643 schannel = ctxt->sax->serror;
644 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200645 if (ctxt != NULL) {
646 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000652 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200653 } else {
654 __xmlRaiseError(schannel, NULL, NULL,
655 ctxt, NULL, XML_FROM_DTD, error,
656 XML_ERR_ERROR, NULL, 0, (const char *) str1,
657 (const char *) str2, NULL, 0, 0,
658 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000659 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000660}
661
662/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000663 * xmlFatalErrMsgInt:
664 * @ctxt: an XML parser context
665 * @error: the error number
666 * @msg: the error message
667 * @val: an integer value
668 *
669 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
670 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800671static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000672xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000673 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000674{
Daniel Veillard157fee02003-10-31 10:36:03 +0000675 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
676 (ctxt->instate == XML_PARSER_EOF))
677 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000678 if (ctxt != NULL)
679 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000680 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000681 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
682 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000683 if (ctxt != NULL) {
684 ctxt->wellFormed = 0;
685 if (ctxt->recovery == 0)
686 ctxt->disableSAX = 1;
687 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000688}
689
690/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000691 * xmlFatalErrMsgStrIntStr:
692 * @ctxt: an XML parser context
693 * @error: the error number
694 * @msg: the error message
695 * @str1: an string info
696 * @val: an integer value
697 * @str2: an string info
698 *
699 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
700 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800701static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000702xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800703 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000704 const xmlChar *str2)
705{
Daniel Veillard157fee02003-10-31 10:36:03 +0000706 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
707 (ctxt->instate == XML_PARSER_EOF))
708 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL)
710 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000711 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000712 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
713 NULL, 0, (const char *) str1, (const char *) str2,
714 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000715 if (ctxt != NULL) {
716 ctxt->wellFormed = 0;
717 if (ctxt->recovery == 0)
718 ctxt->disableSAX = 1;
719 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000720}
721
722/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000723 * xmlFatalErrMsgStr:
724 * @ctxt: an XML parser context
725 * @error: the error number
726 * @msg: the error message
727 * @val: a string value
728 *
729 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
730 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800731static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000732xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000733 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000734{
Daniel Veillard157fee02003-10-31 10:36:03 +0000735 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
736 (ctxt->instate == XML_PARSER_EOF))
737 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000738 if (ctxt != NULL)
739 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000740 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000741 XML_FROM_PARSER, error, XML_ERR_FATAL,
742 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
743 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000744 if (ctxt != NULL) {
745 ctxt->wellFormed = 0;
746 if (ctxt->recovery == 0)
747 ctxt->disableSAX = 1;
748 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000749}
750
751/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000752 * xmlErrMsgStr:
753 * @ctxt: an XML parser context
754 * @error: the error number
755 * @msg: the error message
756 * @val: a string value
757 *
758 * Handle a non fatal parser error
759 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800760static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardf403d292003-10-05 13:51:35 +0000761xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
762 const char *msg, const xmlChar * val)
763{
Daniel Veillard157fee02003-10-31 10:36:03 +0000764 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
765 (ctxt->instate == XML_PARSER_EOF))
766 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000767 if (ctxt != NULL)
768 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000769 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000770 XML_FROM_PARSER, error, XML_ERR_ERROR,
771 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
772 val);
773}
774
775/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000776 * xmlNsErr:
777 * @ctxt: an XML parser context
778 * @error: the error number
779 * @msg: the message
780 * @info1: extra information string
781 * @info2: extra information string
782 *
783 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
784 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800785static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000786xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
787 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000788 const xmlChar * info1, const xmlChar * info2,
789 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000790{
Daniel Veillard157fee02003-10-31 10:36:03 +0000791 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
792 (ctxt->instate == XML_PARSER_EOF))
793 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000794 if (ctxt != NULL)
795 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000796 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000797 XML_ERR_ERROR, NULL, 0, (const char *) info1,
798 (const char *) info2, (const char *) info3, 0, 0, msg,
799 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000800 if (ctxt != NULL)
801 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000802}
803
Daniel Veillard37334572008-07-31 08:20:02 +0000804/**
805 * xmlNsWarn
806 * @ctxt: an XML parser context
807 * @error: the error number
808 * @msg: the message
809 * @info1: extra information string
810 * @info2: extra information string
811 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800812 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000813 */
David Kilzer4472c3a2016-05-13 15:13:17 +0800814static void LIBXML_ATTR_FORMAT(3,0)
Daniel Veillard37334572008-07-31 08:20:02 +0000815xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
816 const char *msg,
817 const xmlChar * info1, const xmlChar * info2,
818 const xmlChar * info3)
819{
820 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
821 (ctxt->instate == XML_PARSER_EOF))
822 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000823 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
824 XML_ERR_WARNING, NULL, 0, (const char *) info1,
825 (const char *) info2, (const char *) info3, 0, 0, msg,
826 info1, info2, info3);
827}
828
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000829/************************************************************************
830 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800831 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000832 * *
833 ************************************************************************/
834
835/**
836 * xmlHasFeature:
837 * @feature: the feature to be examined
838 *
839 * Examines if the library has been compiled with a given feature.
840 *
841 * Returns a non-zero value if the feature exist, otherwise zero.
842 * Returns zero (0) if the feature does not exist or an unknown
843 * unknown feature is requested, non-zero otherwise.
844 */
845int
846xmlHasFeature(xmlFeature feature)
847{
848 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000849 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850#ifdef LIBXML_THREAD_ENABLED
851 return(1);
852#else
853 return(0);
854#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000855 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000856#ifdef LIBXML_TREE_ENABLED
857 return(1);
858#else
859 return(0);
860#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000861 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000862#ifdef LIBXML_OUTPUT_ENABLED
863 return(1);
864#else
865 return(0);
866#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000867 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000868#ifdef LIBXML_PUSH_ENABLED
869 return(1);
870#else
871 return(0);
872#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000873 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000874#ifdef LIBXML_READER_ENABLED
875 return(1);
876#else
877 return(0);
878#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000879 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000880#ifdef LIBXML_PATTERN_ENABLED
881 return(1);
882#else
883 return(0);
884#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000885 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000886#ifdef LIBXML_WRITER_ENABLED
887 return(1);
888#else
889 return(0);
890#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000891 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000892#ifdef LIBXML_SAX1_ENABLED
893 return(1);
894#else
895 return(0);
896#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000897 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000898#ifdef LIBXML_FTP_ENABLED
899 return(1);
900#else
901 return(0);
902#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000903 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000904#ifdef LIBXML_HTTP_ENABLED
905 return(1);
906#else
907 return(0);
908#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000909 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000910#ifdef LIBXML_VALID_ENABLED
911 return(1);
912#else
913 return(0);
914#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000915 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000916#ifdef LIBXML_HTML_ENABLED
917 return(1);
918#else
919 return(0);
920#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000921 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000922#ifdef LIBXML_LEGACY_ENABLED
923 return(1);
924#else
925 return(0);
926#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000927 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000928#ifdef LIBXML_C14N_ENABLED
929 return(1);
930#else
931 return(0);
932#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000933 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000934#ifdef LIBXML_CATALOG_ENABLED
935 return(1);
936#else
937 return(0);
938#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000939 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000940#ifdef LIBXML_XPATH_ENABLED
941 return(1);
942#else
943 return(0);
944#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000945 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000946#ifdef LIBXML_XPTR_ENABLED
947 return(1);
948#else
949 return(0);
950#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000951 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000952#ifdef LIBXML_XINCLUDE_ENABLED
953 return(1);
954#else
955 return(0);
956#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000957 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000958#ifdef LIBXML_ICONV_ENABLED
959 return(1);
960#else
961 return(0);
962#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000963 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000964#ifdef LIBXML_ISO8859X_ENABLED
965 return(1);
966#else
967 return(0);
968#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000969 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000970#ifdef LIBXML_UNICODE_ENABLED
971 return(1);
972#else
973 return(0);
974#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000975 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000976#ifdef LIBXML_REGEXP_ENABLED
977 return(1);
978#else
979 return(0);
980#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000981 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000982#ifdef LIBXML_AUTOMATA_ENABLED
983 return(1);
984#else
985 return(0);
986#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000987 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000988#ifdef LIBXML_EXPR_ENABLED
989 return(1);
990#else
991 return(0);
992#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000993 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000994#ifdef LIBXML_SCHEMAS_ENABLED
995 return(1);
996#else
997 return(0);
998#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000999 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001000#ifdef LIBXML_SCHEMATRON_ENABLED
1001 return(1);
1002#else
1003 return(0);
1004#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001005 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001006#ifdef LIBXML_MODULES_ENABLED
1007 return(1);
1008#else
1009 return(0);
1010#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001011 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012#ifdef LIBXML_DEBUG_ENABLED
1013 return(1);
1014#else
1015 return(0);
1016#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001017 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001018#ifdef DEBUG_MEMORY_LOCATION
1019 return(1);
1020#else
1021 return(0);
1022#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001023 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001024#ifdef LIBXML_DEBUG_RUNTIME
1025 return(1);
1026#else
1027 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001028#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001029 case XML_WITH_ZLIB:
1030#ifdef LIBXML_ZLIB_ENABLED
1031 return(1);
1032#else
1033 return(0);
1034#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001035 case XML_WITH_LZMA:
1036#ifdef LIBXML_LZMA_ENABLED
1037 return(1);
1038#else
1039 return(0);
1040#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001041 case XML_WITH_ICU:
1042#ifdef LIBXML_ICU_ENABLED
1043 return(1);
1044#else
1045 return(0);
1046#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001047 default:
1048 break;
1049 }
1050 return(0);
1051}
1052
1053/************************************************************************
1054 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001055 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 * *
1057 ************************************************************************/
1058
1059/**
1060 * xmlDetectSAX2:
1061 * @ctxt: an XML parser context
1062 *
1063 * Do the SAX2 detection and specific intialization
1064 */
1065static void
1066xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1067 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001068#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001069 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1070 ((ctxt->sax->startElementNs != NULL) ||
1071 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001072#else
1073 ctxt->sax2 = 1;
1074#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001075
1076 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1077 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1078 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001079 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1080 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001081 xmlErrMemory(ctxt, NULL);
1082 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001083}
1084
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085typedef struct _xmlDefAttrs xmlDefAttrs;
1086typedef xmlDefAttrs *xmlDefAttrsPtr;
1087struct _xmlDefAttrs {
1088 int nbAttrs; /* number of defaulted attributes on that element */
1089 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001090 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001091};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001092
1093/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001094 * xmlAttrNormalizeSpace:
1095 * @src: the source string
1096 * @dst: the target string
1097 *
1098 * Normalize the space in non CDATA attribute values:
1099 * If the attribute type is not CDATA, then the XML processor MUST further
1100 * process the normalized attribute value by discarding any leading and
1101 * trailing space (#x20) characters, and by replacing sequences of space
1102 * (#x20) characters by a single space (#x20) character.
1103 * Note that the size of dst need to be at least src, and if one doesn't need
1104 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1105 * passing src as dst is just fine.
1106 *
1107 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1108 * is needed.
1109 */
1110static xmlChar *
1111xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1112{
1113 if ((src == NULL) || (dst == NULL))
1114 return(NULL);
1115
1116 while (*src == 0x20) src++;
1117 while (*src != 0) {
1118 if (*src == 0x20) {
1119 while (*src == 0x20) src++;
1120 if (*src != 0)
1121 *dst++ = 0x20;
1122 } else {
1123 *dst++ = *src++;
1124 }
1125 }
1126 *dst = 0;
1127 if (dst == src)
1128 return(NULL);
1129 return(dst);
1130}
1131
1132/**
1133 * xmlAttrNormalizeSpace2:
1134 * @src: the source string
1135 *
1136 * Normalize the space in non CDATA attribute values, a slightly more complex
1137 * front end to avoid allocation problems when running on attribute values
1138 * coming from the input.
1139 *
1140 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1141 * is needed.
1142 */
1143static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001144xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001145{
1146 int i;
1147 int remove_head = 0;
1148 int need_realloc = 0;
1149 const xmlChar *cur;
1150
1151 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1152 return(NULL);
1153 i = *len;
1154 if (i <= 0)
1155 return(NULL);
1156
1157 cur = src;
1158 while (*cur == 0x20) {
1159 cur++;
1160 remove_head++;
1161 }
1162 while (*cur != 0) {
1163 if (*cur == 0x20) {
1164 cur++;
1165 if ((*cur == 0x20) || (*cur == 0)) {
1166 need_realloc = 1;
1167 break;
1168 }
1169 } else
1170 cur++;
1171 }
1172 if (need_realloc) {
1173 xmlChar *ret;
1174
1175 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1176 if (ret == NULL) {
1177 xmlErrMemory(ctxt, NULL);
1178 return(NULL);
1179 }
1180 xmlAttrNormalizeSpace(ret, ret);
1181 *len = (int) strlen((const char *)ret);
1182 return(ret);
1183 } else if (remove_head) {
1184 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001185 memmove(src, src + remove_head, 1 + *len);
1186 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001187 }
1188 return(NULL);
1189}
1190
1191/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 * xmlAddDefAttrs:
1193 * @ctxt: an XML parser context
1194 * @fullname: the element fullname
1195 * @fullattr: the attribute fullname
1196 * @value: the attribute value
1197 *
1198 * Add a defaulted attribute for an element
1199 */
1200static void
1201xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1202 const xmlChar *fullname,
1203 const xmlChar *fullattr,
1204 const xmlChar *value) {
1205 xmlDefAttrsPtr defaults;
1206 int len;
1207 const xmlChar *name;
1208 const xmlChar *prefix;
1209
Daniel Veillard6a31b832008-03-26 14:06:44 +00001210 /*
1211 * Allows to detect attribute redefinitions
1212 */
1213 if (ctxt->attsSpecial != NULL) {
1214 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1215 return;
1216 }
1217
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001219 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001220 if (ctxt->attsDefault == NULL)
1221 goto mem_error;
1222 }
1223
1224 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 * split the element name into prefix:localname , the string found
1226 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001227 */
1228 name = xmlSplitQName3(fullname, &len);
1229 if (name == NULL) {
1230 name = xmlDictLookup(ctxt->dict, fullname, -1);
1231 prefix = NULL;
1232 } else {
1233 name = xmlDictLookup(ctxt->dict, name, -1);
1234 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1235 }
1236
1237 /*
1238 * make sure there is some storage
1239 */
1240 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1241 if (defaults == NULL) {
1242 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001243 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001244 if (defaults == NULL)
1245 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001246 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001247 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001248 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1249 defaults, NULL) < 0) {
1250 xmlFree(defaults);
1251 goto mem_error;
1252 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001253 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001254 xmlDefAttrsPtr temp;
1255
1256 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001257 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001258 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001259 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001260 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001261 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001262 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1263 defaults, NULL) < 0) {
1264 xmlFree(defaults);
1265 goto mem_error;
1266 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001267 }
1268
1269 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001270 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001271 * are within the DTD and hen not associated to namespace names.
1272 */
1273 name = xmlSplitQName3(fullattr, &len);
1274 if (name == NULL) {
1275 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1276 prefix = NULL;
1277 } else {
1278 name = xmlDictLookup(ctxt->dict, name, -1);
1279 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1280 }
1281
Daniel Veillardae0765b2008-07-31 19:54:59 +00001282 defaults->values[5 * defaults->nbAttrs] = name;
1283 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001284 /* intern the string and precompute the end */
1285 len = xmlStrlen(value);
1286 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001287 defaults->values[5 * defaults->nbAttrs + 2] = value;
1288 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1289 if (ctxt->external)
1290 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1291 else
1292 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001293 defaults->nbAttrs++;
1294
1295 return;
1296
1297mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001298 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001299 return;
1300}
1301
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001302/**
1303 * xmlAddSpecialAttr:
1304 * @ctxt: an XML parser context
1305 * @fullname: the element fullname
1306 * @fullattr: the attribute fullname
1307 * @type: the attribute type
1308 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001309 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001310 */
1311static void
1312xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1313 const xmlChar *fullname,
1314 const xmlChar *fullattr,
1315 int type)
1316{
1317 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001318 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001319 if (ctxt->attsSpecial == NULL)
1320 goto mem_error;
1321 }
1322
Daniel Veillardac4118d2008-01-11 05:27:32 +00001323 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1324 return;
1325
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001326 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1327 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001328 return;
1329
1330mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001331 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001332 return;
1333}
1334
Daniel Veillard4432df22003-09-28 18:58:27 +00001335/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001336 * xmlCleanSpecialAttrCallback:
1337 *
1338 * Removes CDATA attributes from the special attribute table
1339 */
1340static void
1341xmlCleanSpecialAttrCallback(void *payload, void *data,
1342 const xmlChar *fullname, const xmlChar *fullattr,
1343 const xmlChar *unused ATTRIBUTE_UNUSED) {
1344 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1345
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001346 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001347 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1348 }
1349}
1350
1351/**
1352 * xmlCleanSpecialAttr:
1353 * @ctxt: an XML parser context
1354 *
1355 * Trim the list of attributes defined to remove all those of type
1356 * CDATA as they are not special. This call should be done when finishing
1357 * to parse the DTD and before starting to parse the document root.
1358 */
1359static void
1360xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1361{
1362 if (ctxt->attsSpecial == NULL)
1363 return;
1364
1365 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1366
1367 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1368 xmlHashFree(ctxt->attsSpecial, NULL);
1369 ctxt->attsSpecial = NULL;
1370 }
1371 return;
1372}
1373
1374/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001375 * xmlCheckLanguageID:
1376 * @lang: pointer to the string value
1377 *
1378 * Checks that the value conforms to the LanguageID production:
1379 *
1380 * NOTE: this is somewhat deprecated, those productions were removed from
1381 * the XML Second edition.
1382 *
1383 * [33] LanguageID ::= Langcode ('-' Subcode)*
1384 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1385 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1386 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1387 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1388 * [38] Subcode ::= ([a-z] | [A-Z])+
1389 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001390 * The current REC reference the sucessors of RFC 1766, currently 5646
1391 *
1392 * http://www.rfc-editor.org/rfc/rfc5646.txt
1393 * langtag = language
1394 * ["-" script]
1395 * ["-" region]
1396 * *("-" variant)
1397 * *("-" extension)
1398 * ["-" privateuse]
1399 * language = 2*3ALPHA ; shortest ISO 639 code
1400 * ["-" extlang] ; sometimes followed by
1401 * ; extended language subtags
1402 * / 4ALPHA ; or reserved for future use
1403 * / 5*8ALPHA ; or registered language subtag
1404 *
1405 * extlang = 3ALPHA ; selected ISO 639 codes
1406 * *2("-" 3ALPHA) ; permanently reserved
1407 *
1408 * script = 4ALPHA ; ISO 15924 code
1409 *
1410 * region = 2ALPHA ; ISO 3166-1 code
1411 * / 3DIGIT ; UN M.49 code
1412 *
1413 * variant = 5*8alphanum ; registered variants
1414 * / (DIGIT 3alphanum)
1415 *
1416 * extension = singleton 1*("-" (2*8alphanum))
1417 *
1418 * ; Single alphanumerics
1419 * ; "x" reserved for private use
1420 * singleton = DIGIT ; 0 - 9
1421 * / %x41-57 ; A - W
1422 * / %x59-5A ; Y - Z
1423 * / %x61-77 ; a - w
1424 * / %x79-7A ; y - z
1425 *
1426 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1427 * The parser below doesn't try to cope with extension or privateuse
1428 * that could be added but that's not interoperable anyway
1429 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001430 * Returns 1 if correct 0 otherwise
1431 **/
1432int
1433xmlCheckLanguageID(const xmlChar * lang)
1434{
Daniel Veillard60587d62010-11-04 15:16:27 +01001435 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001436
1437 if (cur == NULL)
1438 return (0);
1439 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001440 ((cur[0] == 'I') && (cur[1] == '-')) ||
1441 ((cur[0] == 'x') && (cur[1] == '-')) ||
1442 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001443 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001444 * Still allow IANA code and user code which were coming
1445 * from the previous version of the XML-1.0 specification
1446 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001447 */
1448 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001449 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001450 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1451 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001452 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001453 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001454 nxt = cur;
1455 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1456 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1457 nxt++;
1458 if (nxt - cur >= 4) {
1459 /*
1460 * Reserved
1461 */
1462 if ((nxt - cur > 8) || (nxt[0] != 0))
1463 return(0);
1464 return(1);
1465 }
1466 if (nxt - cur < 2)
1467 return(0);
1468 /* we got an ISO 639 code */
1469 if (nxt[0] == 0)
1470 return(1);
1471 if (nxt[0] != '-')
1472 return(0);
1473
1474 nxt++;
1475 cur = nxt;
1476 /* now we can have extlang or script or region or variant */
1477 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1478 goto region_m49;
1479
1480 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1481 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1482 nxt++;
1483 if (nxt - cur == 4)
1484 goto script;
1485 if (nxt - cur == 2)
1486 goto region;
1487 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1488 goto variant;
1489 if (nxt - cur != 3)
1490 return(0);
1491 /* we parsed an extlang */
1492 if (nxt[0] == 0)
1493 return(1);
1494 if (nxt[0] != '-')
1495 return(0);
1496
1497 nxt++;
1498 cur = nxt;
1499 /* now we can have script or region or variant */
1500 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1501 goto region_m49;
1502
1503 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1504 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1505 nxt++;
1506 if (nxt - cur == 2)
1507 goto region;
1508 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1509 goto variant;
1510 if (nxt - cur != 4)
1511 return(0);
1512 /* we parsed a script */
1513script:
1514 if (nxt[0] == 0)
1515 return(1);
1516 if (nxt[0] != '-')
1517 return(0);
1518
1519 nxt++;
1520 cur = nxt;
1521 /* now we can have region or variant */
1522 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1523 goto region_m49;
1524
1525 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1526 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1527 nxt++;
1528
1529 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1530 goto variant;
1531 if (nxt - cur != 2)
1532 return(0);
1533 /* we parsed a region */
1534region:
1535 if (nxt[0] == 0)
1536 return(1);
1537 if (nxt[0] != '-')
1538 return(0);
1539
1540 nxt++;
1541 cur = nxt;
1542 /* now we can just have a variant */
1543 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1544 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1545 nxt++;
1546
1547 if ((nxt - cur < 5) || (nxt - cur > 8))
1548 return(0);
1549
1550 /* we parsed a variant */
1551variant:
1552 if (nxt[0] == 0)
1553 return(1);
1554 if (nxt[0] != '-')
1555 return(0);
1556 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001557 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001558
1559region_m49:
1560 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1561 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1562 nxt += 3;
1563 goto region;
1564 }
1565 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001566}
1567
Owen Taylor3473f882001-02-23 17:55:21 +00001568/************************************************************************
1569 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001570 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001571 * *
1572 ************************************************************************/
1573
Daniel Veillard8ed10722009-08-20 19:17:36 +02001574static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1575 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001576
Daniel Veillard0fb18932003-09-07 09:14:37 +00001577#ifdef SAX2
1578/**
1579 * nsPush:
1580 * @ctxt: an XML parser context
1581 * @prefix: the namespace prefix or NULL
1582 * @URL: the namespace name
1583 *
1584 * Pushes a new parser namespace on top of the ns stack
1585 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001586 * Returns -1 in case of error, -2 if the namespace should be discarded
1587 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001588 */
1589static int
1590nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1591{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001592 if (ctxt->options & XML_PARSE_NSCLEAN) {
1593 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001594 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001595 if (ctxt->nsTab[i] == prefix) {
1596 /* in scope */
1597 if (ctxt->nsTab[i + 1] == URL)
1598 return(-2);
1599 /* out of scope keep it */
1600 break;
1601 }
1602 }
1603 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001604 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1605 ctxt->nsMax = 10;
1606 ctxt->nsNr = 0;
1607 ctxt->nsTab = (const xmlChar **)
1608 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1609 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001610 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001611 ctxt->nsMax = 0;
1612 return (-1);
1613 }
1614 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001615 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001616 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001617 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1618 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1619 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001620 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001621 ctxt->nsMax /= 2;
1622 return (-1);
1623 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001624 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001625 }
1626 ctxt->nsTab[ctxt->nsNr++] = prefix;
1627 ctxt->nsTab[ctxt->nsNr++] = URL;
1628 return (ctxt->nsNr);
1629}
1630/**
1631 * nsPop:
1632 * @ctxt: an XML parser context
1633 * @nr: the number to pop
1634 *
1635 * Pops the top @nr parser prefix/namespace from the ns stack
1636 *
1637 * Returns the number of namespaces removed
1638 */
1639static int
1640nsPop(xmlParserCtxtPtr ctxt, int nr)
1641{
1642 int i;
1643
1644 if (ctxt->nsTab == NULL) return(0);
1645 if (ctxt->nsNr < nr) {
1646 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1647 nr = ctxt->nsNr;
1648 }
1649 if (ctxt->nsNr <= 0)
1650 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001651
Daniel Veillard0fb18932003-09-07 09:14:37 +00001652 for (i = 0;i < nr;i++) {
1653 ctxt->nsNr--;
1654 ctxt->nsTab[ctxt->nsNr] = NULL;
1655 }
1656 return(nr);
1657}
1658#endif
1659
1660static int
1661xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1662 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001663 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001664 int maxatts;
1665
1666 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001667 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001668 atts = (const xmlChar **)
1669 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001670 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001671 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001672 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1673 if (attallocs == NULL) goto mem_error;
1674 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001675 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001676 } else if (nr + 5 > ctxt->maxatts) {
1677 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001678 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1679 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001680 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001681 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001682 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1683 (maxatts / 5) * sizeof(int));
1684 if (attallocs == NULL) goto mem_error;
1685 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001686 ctxt->maxatts = maxatts;
1687 }
1688 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001689mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001690 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001691 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001692}
1693
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001694/**
1695 * inputPush:
1696 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001697 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 *
1699 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001700 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001701 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001702 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001703int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001704inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1705{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001706 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001707 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 if (ctxt->inputNr >= ctxt->inputMax) {
1709 ctxt->inputMax *= 2;
1710 ctxt->inputTab =
1711 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712 ctxt->inputMax *
1713 sizeof(ctxt->inputTab[0]));
1714 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001715 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001716 xmlFreeInputStream(value);
1717 ctxt->inputMax /= 2;
1718 value = NULL;
1719 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001720 }
1721 }
1722 ctxt->inputTab[ctxt->inputNr] = value;
1723 ctxt->input = value;
1724 return (ctxt->inputNr++);
1725}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001726/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001728 * @ctxt: an XML parser context
1729 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001731 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001732 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001733 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001734xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735inputPop(xmlParserCtxtPtr ctxt)
1736{
1737 xmlParserInputPtr ret;
1738
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001739 if (ctxt == NULL)
1740 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001742 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001743 ctxt->inputNr--;
1744 if (ctxt->inputNr > 0)
1745 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1746 else
1747 ctxt->input = NULL;
1748 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001749 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001750 return (ret);
1751}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001752/**
1753 * nodePush:
1754 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001755 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001756 *
1757 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001758 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001759 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001760 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001761int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001762nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1763{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001764 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001766 xmlNodePtr *tmp;
1767
1768 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001771 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001772 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001773 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001774 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001775 ctxt->nodeTab = tmp;
1776 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001777 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001778 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001780 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001781 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001782 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001783 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001784 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001785 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001786 ctxt->nodeTab[ctxt->nodeNr] = value;
1787 ctxt->node = value;
1788 return (ctxt->nodeNr++);
1789}
Daniel Veillard8915c152008-08-26 13:05:34 +00001790
Daniel Veillard1c732d22002-11-30 11:22:59 +00001791/**
1792 * nodePop:
1793 * @ctxt: an XML parser context
1794 *
1795 * Pops the top element node from the node stack
1796 *
1797 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001798 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001799xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001800nodePop(xmlParserCtxtPtr ctxt)
1801{
1802 xmlNodePtr ret;
1803
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001804 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001805 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001806 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001807 ctxt->nodeNr--;
1808 if (ctxt->nodeNr > 0)
1809 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1810 else
1811 ctxt->node = NULL;
1812 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001813 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001814 return (ret);
1815}
Daniel Veillarda2351322004-06-27 12:08:10 +00001816
1817#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001818/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001819 * nameNsPush:
1820 * @ctxt: an XML parser context
1821 * @value: the element name
1822 * @prefix: the element prefix
1823 * @URI: the element namespace name
1824 *
1825 * Pushes a new element name/prefix/URL on top of the name stack
1826 *
1827 * Returns -1 in case of error, the index in the stack otherwise
1828 */
1829static int
1830nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1831 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1832{
1833 if (ctxt->nameNr >= ctxt->nameMax) {
1834 const xmlChar * *tmp;
1835 void **tmp2;
1836 ctxt->nameMax *= 2;
1837 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1838 ctxt->nameMax *
1839 sizeof(ctxt->nameTab[0]));
1840 if (tmp == NULL) {
1841 ctxt->nameMax /= 2;
1842 goto mem_error;
1843 }
1844 ctxt->nameTab = tmp;
1845 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1846 ctxt->nameMax * 3 *
1847 sizeof(ctxt->pushTab[0]));
1848 if (tmp2 == NULL) {
1849 ctxt->nameMax /= 2;
1850 goto mem_error;
1851 }
1852 ctxt->pushTab = tmp2;
1853 }
1854 ctxt->nameTab[ctxt->nameNr] = value;
1855 ctxt->name = value;
1856 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1857 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001858 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001859 return (ctxt->nameNr++);
1860mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001861 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 return (-1);
1863}
1864/**
1865 * nameNsPop:
1866 * @ctxt: an XML parser context
1867 *
1868 * Pops the top element/prefix/URI name from the name stack
1869 *
1870 * Returns the name just removed
1871 */
1872static const xmlChar *
1873nameNsPop(xmlParserCtxtPtr ctxt)
1874{
1875 const xmlChar *ret;
1876
1877 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001878 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001879 ctxt->nameNr--;
1880 if (ctxt->nameNr > 0)
1881 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1882 else
1883 ctxt->name = NULL;
1884 ret = ctxt->nameTab[ctxt->nameNr];
1885 ctxt->nameTab[ctxt->nameNr] = NULL;
1886 return (ret);
1887}
Daniel Veillarda2351322004-06-27 12:08:10 +00001888#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001889
1890/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001891 * namePush:
1892 * @ctxt: an XML parser context
1893 * @value: the element name
1894 *
1895 * Pushes a new element name on top of the name stack
1896 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001897 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001898 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001899int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001900namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001901{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001902 if (ctxt == NULL) return (-1);
1903
Daniel Veillard1c732d22002-11-30 11:22:59 +00001904 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001905 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001906 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001907 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001908 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001909 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001910 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001911 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001912 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001913 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001914 }
1915 ctxt->nameTab[ctxt->nameNr] = value;
1916 ctxt->name = value;
1917 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001918mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001919 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001920 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001921}
1922/**
1923 * namePop:
1924 * @ctxt: an XML parser context
1925 *
1926 * Pops the top element name from the name stack
1927 *
1928 * Returns the name just removed
1929 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001930const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001931namePop(xmlParserCtxtPtr ctxt)
1932{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001933 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001934
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001935 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1936 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001937 ctxt->nameNr--;
1938 if (ctxt->nameNr > 0)
1939 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1940 else
1941 ctxt->name = NULL;
1942 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001943 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001944 return (ret);
1945}
Owen Taylor3473f882001-02-23 17:55:21 +00001946
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001947static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001948 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001949 int *tmp;
1950
Owen Taylor3473f882001-02-23 17:55:21 +00001951 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001952 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1953 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1954 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001955 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001956 ctxt->spaceMax /=2;
1957 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001958 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001959 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001960 }
1961 ctxt->spaceTab[ctxt->spaceNr] = val;
1962 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1963 return(ctxt->spaceNr++);
1964}
1965
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001966static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001967 int ret;
1968 if (ctxt->spaceNr <= 0) return(0);
1969 ctxt->spaceNr--;
1970 if (ctxt->spaceNr > 0)
1971 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1972 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001973 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001974 ret = ctxt->spaceTab[ctxt->spaceNr];
1975 ctxt->spaceTab[ctxt->spaceNr] = -1;
1976 return(ret);
1977}
1978
1979/*
1980 * Macros for accessing the content. Those should be used only by the parser,
1981 * and not exported.
1982 *
1983 * Dirty macros, i.e. one often need to make assumption on the context to
1984 * use them
1985 *
1986 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1987 * To be used with extreme caution since operations consuming
1988 * characters may move the input buffer to a different location !
1989 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1990 * This should be used internally by the parser
1991 * only to compare to ASCII values otherwise it would break when
1992 * running with UTF-8 encoding.
1993 * RAW same as CUR but in the input buffer, bypass any token
1994 * extraction that may have been done
1995 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1996 * to compare on ASCII based substring.
1997 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001998 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001999 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00002000 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00002001 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2002 *
2003 * NEXT Skip to the next character, this does the proper decoding
2004 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00002005 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00002006 * CUR_CHAR(l) returns the current unicode character (int), set l
2007 * to the number of xmlChars used for the encoding [0-5].
2008 * CUR_SCHAR same but operate on a string instead of the context
2009 * COPY_BUF copy the current unicode char to the target buffer, increment
2010 * the index
2011 * GROW, SHRINK handling of input buffers
2012 */
2013
Daniel Veillardfdc91562002-07-01 21:52:03 +00002014#define RAW (*ctxt->input->cur)
2015#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002016#define NXT(val) ctxt->input->cur[(val)]
2017#define CUR_PTR ctxt->input->cur
Pranjal Jumde45752d22016-03-03 11:50:34 -08002018#define BASE_PTR ctxt->input->base
Owen Taylor3473f882001-02-23 17:55:21 +00002019
Daniel Veillarda07050d2003-10-19 14:46:32 +00002020#define CMP4( s, c1, c2, c3, c4 ) \
2021 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2022 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2023#define CMP5( s, c1, c2, c3, c4, c5 ) \
2024 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2025#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2026 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2027#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2028 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2029#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2030 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2031#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2032 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2033 ((unsigned char *) s)[ 8 ] == c9 )
2034#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2035 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2036 ((unsigned char *) s)[ 9 ] == c10 )
2037
Owen Taylor3473f882001-02-23 17:55:21 +00002038#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002039 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002040 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002041 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002042 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2043 xmlPopInput(ctxt); \
2044 } while (0)
2045
Daniel Veillard0b787f32004-03-26 17:29:53 +00002046#define SKIPL(val) do { \
2047 int skipl; \
2048 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002049 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002050 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002051 } else ctxt->input->col++; \
2052 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002053 ctxt->input->cur++; \
2054 } \
2055 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2056 if ((*ctxt->input->cur == 0) && \
2057 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2058 xmlPopInput(ctxt); \
2059 } while (0)
2060
Daniel Veillarda880b122003-04-21 21:36:41 +00002061#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002062 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2063 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002064 xmlSHRINK (ctxt);
2065
2066static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2067 xmlParserInputShrink(ctxt->input);
2068 if ((*ctxt->input->cur == 0) &&
2069 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2070 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002071 }
Owen Taylor3473f882001-02-23 17:55:21 +00002072
Daniel Veillarda880b122003-04-21 21:36:41 +00002073#define GROW if ((ctxt->progressive == 0) && \
2074 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002075 xmlGROW (ctxt);
2076
2077static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002078 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2079 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2080
2081 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2082 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002083 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002084 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002086 xmlHaltParser(ctxt);
2087 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002088 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002089 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002090 if ((ctxt->input->cur > ctxt->input->end) ||
2091 (ctxt->input->cur < ctxt->input->base)) {
2092 xmlHaltParser(ctxt);
2093 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2094 return;
2095 }
Daniel Veillard59df7832010-02-02 10:24:01 +01002096 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002097 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2098 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002099}
Owen Taylor3473f882001-02-23 17:55:21 +00002100
2101#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2102
2103#define NEXT xmlNextChar(ctxt)
2104
Daniel Veillard21a0f912001-02-25 19:54:14 +00002105#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002106 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002107 ctxt->input->cur++; \
2108 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002109 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002110 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2111 }
2112
Owen Taylor3473f882001-02-23 17:55:21 +00002113#define NEXTL(l) do { \
2114 if (*(ctxt->input->cur) == '\n') { \
2115 ctxt->input->line++; ctxt->input->col = 1; \
2116 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002117 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002118 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002119 } while (0)
2120
2121#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2122#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2123
2124#define COPY_BUF(l,b,i,v) \
2125 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002126 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002127
2128/**
2129 * xmlSkipBlankChars:
2130 * @ctxt: the XML parser context
2131 *
2132 * skip all blanks character found at that point in the input streams.
2133 * It pops up finished entities in the process if allowable at that point.
2134 *
2135 * Returns the number of space chars skipped
2136 */
2137
2138int
2139xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002140 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002141
2142 /*
2143 * It's Okay to use CUR/NEXT here since all the blanks are on
2144 * the ASCII range.
2145 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002146 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2147 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002148 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002149 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002150 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002151 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002152 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002153 if (*cur == '\n') {
2154 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002155 } else {
2156 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002157 }
2158 cur++;
2159 res++;
2160 if (*cur == 0) {
2161 ctxt->input->cur = cur;
2162 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2163 cur = ctxt->input->cur;
2164 }
2165 }
2166 ctxt->input->cur = cur;
2167 } else {
2168 int cur;
2169 do {
2170 cur = CUR;
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002171 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2172 (ctxt->instate != XML_PARSER_EOF))) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002173 NEXT;
2174 cur = CUR;
2175 res++;
2176 }
2177 while ((cur == 0) && (ctxt->inputNr > 1) &&
2178 (ctxt->instate != XML_PARSER_COMMENT)) {
2179 xmlPopInput(ctxt);
2180 cur = CUR;
2181 }
2182 /*
2183 * Need to handle support of entities branching here
2184 */
2185 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002186 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2187 (ctxt->instate != XML_PARSER_EOF));
Daniel Veillard02141ea2001-04-30 11:46:40 +00002188 }
Owen Taylor3473f882001-02-23 17:55:21 +00002189 return(res);
2190}
2191
2192/************************************************************************
2193 * *
2194 * Commodity functions to handle entities *
2195 * *
2196 ************************************************************************/
2197
2198/**
2199 * xmlPopInput:
2200 * @ctxt: an XML parser context
2201 *
2202 * xmlPopInput: the current input pointed by ctxt->input came to an end
2203 * pop it and return the next char.
2204 *
2205 * Returns the current xmlChar in the parser context
2206 */
2207xmlChar
2208xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002209 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002210 if (xmlParserDebugEntities)
2211 xmlGenericError(xmlGenericErrorContext,
2212 "Popping input %d\n", ctxt->inputNr);
2213 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002214 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002215 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2216 return(xmlPopInput(ctxt));
2217 return(CUR);
2218}
2219
2220/**
2221 * xmlPushInput:
2222 * @ctxt: an XML parser context
2223 * @input: an XML parser input fragment (entity, XML fragment ...).
2224 *
2225 * xmlPushInput: switch to a new input stream which is stacked on top
2226 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002227 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002228 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002229int
Owen Taylor3473f882001-02-23 17:55:21 +00002230xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002231 int ret;
2232 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002233
2234 if (xmlParserDebugEntities) {
2235 if ((ctxt->input != NULL) && (ctxt->input->filename))
2236 xmlGenericError(xmlGenericErrorContext,
2237 "%s(%d): ", ctxt->input->filename,
2238 ctxt->input->line);
2239 xmlGenericError(xmlGenericErrorContext,
2240 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2241 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002242 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002243 if (ctxt->instate == XML_PARSER_EOF)
2244 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002245 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002246 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002247}
2248
2249/**
2250 * xmlParseCharRef:
2251 * @ctxt: an XML parser context
2252 *
2253 * parse Reference declarations
2254 *
2255 * [66] CharRef ::= '&#' [0-9]+ ';' |
2256 * '&#x' [0-9a-fA-F]+ ';'
2257 *
2258 * [ WFC: Legal Character ]
2259 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002260 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002261 *
2262 * Returns the value parsed (as an int), 0 in case of error
2263 */
2264int
2265xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002266 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002267 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002268 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002269
Owen Taylor3473f882001-02-23 17:55:21 +00002270 /*
2271 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2272 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002273 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002274 (NXT(2) == 'x')) {
2275 SKIP(3);
2276 GROW;
2277 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002278 if (count++ > 20) {
2279 count = 0;
2280 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002281 if (ctxt->instate == XML_PARSER_EOF)
2282 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002283 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002284 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002285 val = val * 16 + (CUR - '0');
2286 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2287 val = val * 16 + (CUR - 'a') + 10;
2288 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2289 val = val * 16 + (CUR - 'A') + 10;
2290 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002291 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002292 val = 0;
2293 break;
2294 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002295 if (val > 0x10FFFF)
2296 outofrange = val;
2297
Owen Taylor3473f882001-02-23 17:55:21 +00002298 NEXT;
2299 count++;
2300 }
2301 if (RAW == ';') {
2302 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002303 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002304 ctxt->nbChars ++;
2305 ctxt->input->cur++;
2306 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002307 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002308 SKIP(2);
2309 GROW;
2310 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002311 if (count++ > 20) {
2312 count = 0;
2313 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002314 if (ctxt->instate == XML_PARSER_EOF)
2315 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002316 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002317 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002318 val = val * 10 + (CUR - '0');
2319 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002320 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002321 val = 0;
2322 break;
2323 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002324 if (val > 0x10FFFF)
2325 outofrange = val;
2326
Owen Taylor3473f882001-02-23 17:55:21 +00002327 NEXT;
2328 count++;
2329 }
2330 if (RAW == ';') {
2331 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002332 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002333 ctxt->nbChars ++;
2334 ctxt->input->cur++;
2335 }
2336 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002337 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002338 }
2339
2340 /*
2341 * [ WFC: Legal Character ]
2342 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002343 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002344 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002345 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002346 return(val);
2347 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002348 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2349 "xmlParseCharRef: invalid xmlChar value %d\n",
2350 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002351 }
2352 return(0);
2353}
2354
2355/**
2356 * xmlParseStringCharRef:
2357 * @ctxt: an XML parser context
2358 * @str: a pointer to an index in the string
2359 *
2360 * parse Reference declarations, variant parsing from a string rather
2361 * than an an input flow.
2362 *
2363 * [66] CharRef ::= '&#' [0-9]+ ';' |
2364 * '&#x' [0-9a-fA-F]+ ';'
2365 *
2366 * [ WFC: Legal Character ]
2367 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002368 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002369 *
2370 * Returns the value parsed (as an int), 0 in case of error, str will be
2371 * updated to the current value of the index
2372 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002373static int
Owen Taylor3473f882001-02-23 17:55:21 +00002374xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2375 const xmlChar *ptr;
2376 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002377 unsigned int val = 0;
2378 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002379
2380 if ((str == NULL) || (*str == NULL)) return(0);
2381 ptr = *str;
2382 cur = *ptr;
2383 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2384 ptr += 3;
2385 cur = *ptr;
2386 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002387 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002388 val = val * 16 + (cur - '0');
2389 else if ((cur >= 'a') && (cur <= 'f'))
2390 val = val * 16 + (cur - 'a') + 10;
2391 else if ((cur >= 'A') && (cur <= 'F'))
2392 val = val * 16 + (cur - 'A') + 10;
2393 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002394 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002395 val = 0;
2396 break;
2397 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002398 if (val > 0x10FFFF)
2399 outofrange = val;
2400
Owen Taylor3473f882001-02-23 17:55:21 +00002401 ptr++;
2402 cur = *ptr;
2403 }
2404 if (cur == ';')
2405 ptr++;
2406 } else if ((cur == '&') && (ptr[1] == '#')){
2407 ptr += 2;
2408 cur = *ptr;
2409 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002410 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002411 val = val * 10 + (cur - '0');
2412 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002413 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002414 val = 0;
2415 break;
2416 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002417 if (val > 0x10FFFF)
2418 outofrange = val;
2419
Owen Taylor3473f882001-02-23 17:55:21 +00002420 ptr++;
2421 cur = *ptr;
2422 }
2423 if (cur == ';')
2424 ptr++;
2425 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002426 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002427 return(0);
2428 }
2429 *str = ptr;
2430
2431 /*
2432 * [ WFC: Legal Character ]
2433 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002434 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002435 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002436 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002437 return(val);
2438 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002439 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2440 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2441 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002442 }
2443 return(0);
2444}
2445
2446/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002447 * xmlNewBlanksWrapperInputStream:
2448 * @ctxt: an XML parser context
2449 * @entity: an Entity pointer
2450 *
2451 * Create a new input stream for wrapping
2452 * blanks around a PEReference
2453 *
2454 * Returns the new input stream or NULL
2455 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002456
Daniel Veillardf5582f12002-06-11 10:08:16 +00002457static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002458
Daniel Veillardf4862f02002-09-10 11:13:43 +00002459static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002460xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2461 xmlParserInputPtr input;
2462 xmlChar *buffer;
2463 size_t length;
2464 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002465 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2466 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002467 return(NULL);
2468 }
2469 if (xmlParserDebugEntities)
2470 xmlGenericError(xmlGenericErrorContext,
2471 "new blanks wrapper for entity: %s\n", entity->name);
2472 input = xmlNewInputStream(ctxt);
2473 if (input == NULL) {
2474 return(NULL);
2475 }
2476 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002477 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002478 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002479 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002480 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002481 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002482 }
2483 buffer [0] = ' ';
2484 buffer [1] = '%';
2485 buffer [length-3] = ';';
2486 buffer [length-2] = ' ';
2487 buffer [length-1] = 0;
2488 memcpy(buffer + 2, entity->name, length - 5);
2489 input->free = deallocblankswrapper;
2490 input->base = buffer;
2491 input->cur = buffer;
2492 input->length = length;
2493 input->end = &buffer[length];
2494 return(input);
2495}
2496
2497/**
Owen Taylor3473f882001-02-23 17:55:21 +00002498 * xmlParserHandlePEReference:
2499 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002500 *
Owen Taylor3473f882001-02-23 17:55:21 +00002501 * [69] PEReference ::= '%' Name ';'
2502 *
2503 * [ WFC: No Recursion ]
2504 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002505 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002506 *
2507 * [ WFC: Entity Declared ]
2508 * In a document without any DTD, a document with only an internal DTD
2509 * subset which contains no parameter entity references, or a document
2510 * with "standalone='yes'", ... ... The declaration of a parameter
2511 * entity must precede any reference to it...
2512 *
2513 * [ VC: Entity Declared ]
2514 * In a document with an external subset or external parameter entities
2515 * with "standalone='no'", ... ... The declaration of a parameter entity
2516 * must precede any reference to it...
2517 *
2518 * [ WFC: In DTD ]
2519 * Parameter-entity references may only appear in the DTD.
2520 * NOTE: misleading but this is handled.
2521 *
2522 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002523 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002524 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002525 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002526 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002527 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002528 */
2529void
2530xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002531 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 xmlEntityPtr entity = NULL;
2533 xmlParserInputPtr input;
2534
Owen Taylor3473f882001-02-23 17:55:21 +00002535 if (RAW != '%') return;
2536 switch(ctxt->instate) {
2537 case XML_PARSER_CDATA_SECTION:
2538 return;
2539 case XML_PARSER_COMMENT:
2540 return;
2541 case XML_PARSER_START_TAG:
2542 return;
2543 case XML_PARSER_END_TAG:
2544 return;
2545 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002546 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002547 return;
2548 case XML_PARSER_PROLOG:
2549 case XML_PARSER_START:
2550 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002551 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002552 return;
2553 case XML_PARSER_ENTITY_DECL:
2554 case XML_PARSER_CONTENT:
2555 case XML_PARSER_ATTRIBUTE_VALUE:
2556 case XML_PARSER_PI:
2557 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002558 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002559 /* we just ignore it there */
2560 return;
2561 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002562 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002563 return;
2564 case XML_PARSER_ENTITY_VALUE:
2565 /*
2566 * NOTE: in the case of entity values, we don't do the
2567 * substitution here since we need the literal
2568 * entity value to be able to save the internal
2569 * subset of the document.
2570 * This will be handled by xmlStringDecodeEntities
2571 */
2572 return;
2573 case XML_PARSER_DTD:
2574 /*
2575 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2576 * In the internal DTD subset, parameter-entity references
2577 * can occur only where markup declarations can occur, not
2578 * within markup declarations.
2579 * In that case this is handled in xmlParseMarkupDecl
2580 */
2581 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2582 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002583 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002584 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002585 break;
2586 case XML_PARSER_IGNORE:
2587 return;
2588 }
2589
2590 NEXT;
2591 name = xmlParseName(ctxt);
2592 if (xmlParserDebugEntities)
2593 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002594 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002595 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002596 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002597 } else {
2598 if (RAW == ';') {
2599 NEXT;
2600 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2601 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002602 if (ctxt->instate == XML_PARSER_EOF)
2603 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002604 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002605
Owen Taylor3473f882001-02-23 17:55:21 +00002606 /*
2607 * [ WFC: Entity Declared ]
2608 * In a document without any DTD, a document with only an
2609 * internal DTD subset which contains no parameter entity
2610 * references, or a document with "standalone='yes'", ...
2611 * ... The declaration of a parameter entity must precede
2612 * any reference to it...
2613 */
2614 if ((ctxt->standalone == 1) ||
2615 ((ctxt->hasExternalSubset == 0) &&
2616 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002617 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002618 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002619 } else {
2620 /*
2621 * [ VC: Entity Declared ]
2622 * In a document with an external subset or external
2623 * parameter entities with "standalone='no'", ...
2624 * ... The declaration of a parameter entity must precede
2625 * any reference to it...
2626 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002627 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2628 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2629 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002630 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002631 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002632 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2633 "PEReference: %%%s; not found\n",
2634 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002635 ctxt->valid = 0;
2636 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002637 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002638 } else if (ctxt->input->free != deallocblankswrapper) {
2639 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002640 if (xmlPushInput(ctxt, input) < 0)
2641 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002642 } else {
2643 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2644 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002645 xmlChar start[4];
2646 xmlCharEncoding enc;
2647
Owen Taylor3473f882001-02-23 17:55:21 +00002648 /*
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002649 * Note: external parameter entities will not be loaded, it
2650 * is not required for a non-validating parser, unless the
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002651 * option of validating, or substituting entities were
2652 * given. Doing so is far more secure as the parser will
2653 * only process data coming from the document entity by
2654 * default.
2655 */
2656 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2657 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2658 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002659 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2660 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2661 (ctxt->replaceEntities == 0) &&
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002662 (ctxt->validate == 0))
2663 return;
2664
2665 /*
Owen Taylor3473f882001-02-23 17:55:21 +00002666 * handle the extra spaces added before and after
2667 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002668 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002669 */
2670 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002671 if (xmlPushInput(ctxt, input) < 0)
2672 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002673
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002674 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002675 * Get the 4 first bytes and decode the charset
2676 * if enc != XML_CHAR_ENCODING_NONE
2677 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002678 * Note that, since we may have some non-UTF8
2679 * encoding (like UTF16, bug 135229), the 'length'
2680 * is not known, but we can calculate based upon
2681 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002682 */
2683 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002684 if (ctxt->instate == XML_PARSER_EOF)
2685 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002686 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002687 start[0] = RAW;
2688 start[1] = NXT(1);
2689 start[2] = NXT(2);
2690 start[3] = NXT(3);
2691 enc = xmlDetectCharEncoding(start, 4);
2692 if (enc != XML_CHAR_ENCODING_NONE) {
2693 xmlSwitchEncoding(ctxt, enc);
2694 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002695 }
2696
Owen Taylor3473f882001-02-23 17:55:21 +00002697 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002698 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2699 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002700 xmlParseTextDecl(ctxt);
2701 }
Owen Taylor3473f882001-02-23 17:55:21 +00002702 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002703 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2704 "PEReference: %s is not a parameter entity\n",
2705 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002706 }
2707 }
2708 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002709 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002710 }
Owen Taylor3473f882001-02-23 17:55:21 +00002711 }
2712}
2713
2714/*
2715 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002716 * buffer##_size is expected to be a size_t
2717 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002718 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002719#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002720 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002721 size_t new_size = buffer##_size * 2 + n; \
2722 if (new_size < buffer##_size) goto mem_error; \
2723 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002724 if (tmp == NULL) goto mem_error; \
2725 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002726 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002727}
2728
2729/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002730 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002731 * @ctxt: the parser context
2732 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002733 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002734 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2735 * @end: an end marker xmlChar, 0 if none
2736 * @end2: an end marker xmlChar, 0 if none
2737 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002738 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002739 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002740 *
2741 * [67] Reference ::= EntityRef | CharRef
2742 *
2743 * [69] PEReference ::= '%' Name ';'
2744 *
2745 * Returns A newly allocated string with the substitution done. The caller
2746 * must deallocate it !
2747 */
2748xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002749xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2750 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002751 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002752 size_t buffer_size = 0;
2753 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002754
2755 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002756 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002757 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002758 xmlEntityPtr ent;
2759 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002760
Daniel Veillarda82b1822004-11-08 16:24:57 +00002761 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002762 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002763 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002764
Daniel Veillard0161e632008-08-28 15:36:32 +00002765 if (((ctxt->depth > 40) &&
2766 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2767 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002768 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002769 return(NULL);
2770 }
2771
2772 /*
2773 * allocate a translation buffer.
2774 */
2775 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002776 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002777 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002778
2779 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002780 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002781 * we are operating on already parsed values.
2782 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002783 if (str < last)
2784 c = CUR_SCHAR(str, l);
2785 else
2786 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002787 while ((c != 0) && (c != end) && /* non input consuming loop */
2788 (c != end2) && (c != end3)) {
2789
2790 if (c == 0) break;
2791 if ((c == '&') && (str[1] == '#')) {
2792 int val = xmlParseStringCharRef(ctxt, &str);
2793 if (val != 0) {
2794 COPY_BUF(0,buffer,nbchars,val);
2795 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002796 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002797 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002798 }
Owen Taylor3473f882001-02-23 17:55:21 +00002799 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2800 if (xmlParserDebugEntities)
2801 xmlGenericError(xmlGenericErrorContext,
2802 "String decoding Entity Reference: %.30s\n",
2803 str);
2804 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002805 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2806 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002807 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002808 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002809 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002810 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002811 if ((ent != NULL) &&
2812 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2813 if (ent->content != NULL) {
2814 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002815 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002816 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002817 }
Owen Taylor3473f882001-02-23 17:55:21 +00002818 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002819 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2820 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002821 }
2822 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002823 ctxt->depth++;
2824 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2825 0, 0, 0);
2826 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002827
David Drysdale69030712015-11-20 11:13:45 +08002828 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2829 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2830 goto int_error;
2831
Owen Taylor3473f882001-02-23 17:55:21 +00002832 if (rep != NULL) {
2833 current = rep;
2834 while (*current != 0) { /* non input consuming loop */
2835 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002836 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002837 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002838 goto int_error;
2839 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002840 }
2841 }
2842 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002843 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002844 }
2845 } else if (ent != NULL) {
2846 int i = xmlStrlen(ent->name);
2847 const xmlChar *cur = ent->name;
2848
2849 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002850 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002851 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002852 }
2853 for (;i > 0;i--)
2854 buffer[nbchars++] = *cur++;
2855 buffer[nbchars++] = ';';
2856 }
2857 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2858 if (xmlParserDebugEntities)
2859 xmlGenericError(xmlGenericErrorContext,
2860 "String decoding PE Reference: %.30s\n", str);
2861 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002862 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2863 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002864 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002865 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002866 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002867 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002868 if (ent->content == NULL) {
Daniel Veillardb1d34de2016-03-14 17:19:44 +08002869 /*
2870 * Note: external parsed entities will not be loaded,
2871 * it is not required for a non-validating parser to
2872 * complete external PEreferences coming from the
2873 * internal subset
2874 */
2875 if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2876 ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2877 (ctxt->validate != 0)) {
2878 xmlLoadEntityContent(ctxt, ent);
2879 } else {
2880 xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2881 "not validating will not read content for PE entity %s\n",
2882 ent->name, NULL);
2883 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002884 }
Owen Taylor3473f882001-02-23 17:55:21 +00002885 ctxt->depth++;
2886 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2887 0, 0, 0);
2888 ctxt->depth--;
2889 if (rep != NULL) {
2890 current = rep;
2891 while (*current != 0) { /* non input consuming loop */
2892 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002893 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002894 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002895 goto int_error;
2896 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002897 }
2898 }
2899 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002900 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002901 }
2902 }
2903 } else {
2904 COPY_BUF(l,buffer,nbchars,c);
2905 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002906 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2907 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002908 }
2909 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002910 if (str < last)
2911 c = CUR_SCHAR(str, l);
2912 else
2913 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002914 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002915 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002916 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002917
2918mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002919 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002920int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002921 if (rep != NULL)
2922 xmlFree(rep);
2923 if (buffer != NULL)
2924 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002925 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002926}
2927
Daniel Veillarde57ec792003-09-10 10:50:59 +00002928/**
2929 * xmlStringDecodeEntities:
2930 * @ctxt: the parser context
2931 * @str: the input string
2932 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2933 * @end: an end marker xmlChar, 0 if none
2934 * @end2: an end marker xmlChar, 0 if none
2935 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002936 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002937 * Takes a entity string content and process to do the adequate substitutions.
2938 *
2939 * [67] Reference ::= EntityRef | CharRef
2940 *
2941 * [69] PEReference ::= '%' Name ';'
2942 *
2943 * Returns A newly allocated string with the substitution done. The caller
2944 * must deallocate it !
2945 */
2946xmlChar *
2947xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2948 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002949 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002950 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2951 end, end2, end3));
2952}
Owen Taylor3473f882001-02-23 17:55:21 +00002953
2954/************************************************************************
2955 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002956 * Commodity functions, cleanup needed ? *
2957 * *
2958 ************************************************************************/
2959
2960/**
2961 * areBlanks:
2962 * @ctxt: an XML parser context
2963 * @str: a xmlChar *
2964 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002965 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002966 *
2967 * Is this a sequence of blank chars that one can ignore ?
2968 *
2969 * Returns 1 if ignorable 0 otherwise.
2970 */
2971
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002972static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2973 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002974 int i, ret;
2975 xmlNodePtr lastChild;
2976
Daniel Veillard05c13a22001-09-09 08:38:09 +00002977 /*
2978 * Don't spend time trying to differentiate them, the same callback is
2979 * used !
2980 */
2981 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002982 return(0);
2983
Owen Taylor3473f882001-02-23 17:55:21 +00002984 /*
2985 * Check for xml:space value.
2986 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002987 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2988 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002989 return(0);
2990
2991 /*
2992 * Check that the string is made of blanks
2993 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002994 if (blank_chars == 0) {
2995 for (i = 0;i < len;i++)
2996 if (!(IS_BLANK_CH(str[i]))) return(0);
2997 }
Owen Taylor3473f882001-02-23 17:55:21 +00002998
2999 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003000 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00003001 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00003002 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003003 if (ctxt->myDoc != NULL) {
3004 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3005 if (ret == 0) return(1);
3006 if (ret == 1) return(0);
3007 }
3008
3009 /*
3010 * Otherwise, heuristic :-\
3011 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00003012 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003013 if ((ctxt->node->children == NULL) &&
3014 (RAW == '<') && (NXT(1) == '/')) return(0);
3015
3016 lastChild = xmlGetLastChild(ctxt->node);
3017 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00003018 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3019 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00003020 } else if (xmlNodeIsText(lastChild))
3021 return(0);
3022 else if ((ctxt->node->children != NULL) &&
3023 (xmlNodeIsText(ctxt->node->children)))
3024 return(0);
3025 return(1);
3026}
3027
Owen Taylor3473f882001-02-23 17:55:21 +00003028/************************************************************************
3029 * *
3030 * Extra stuff for namespace support *
3031 * Relates to http://www.w3.org/TR/WD-xml-names *
3032 * *
3033 ************************************************************************/
3034
3035/**
3036 * xmlSplitQName:
3037 * @ctxt: an XML parser context
3038 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003039 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00003040 *
3041 * parse an UTF8 encoded XML qualified name string
3042 *
3043 * [NS 5] QName ::= (Prefix ':')? LocalPart
3044 *
3045 * [NS 6] Prefix ::= NCName
3046 *
3047 * [NS 7] LocalPart ::= NCName
3048 *
3049 * Returns the local part, and prefix is updated
3050 * to get the Prefix if any.
3051 */
3052
3053xmlChar *
3054xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3055 xmlChar buf[XML_MAX_NAMELEN + 5];
3056 xmlChar *buffer = NULL;
3057 int len = 0;
3058 int max = XML_MAX_NAMELEN;
3059 xmlChar *ret = NULL;
3060 const xmlChar *cur = name;
3061 int c;
3062
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003063 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003064 *prefix = NULL;
3065
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00003066 if (cur == NULL) return(NULL);
3067
Owen Taylor3473f882001-02-23 17:55:21 +00003068#ifndef XML_XML_NAMESPACE
3069 /* xml: prefix is not really a namespace */
3070 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3071 (cur[2] == 'l') && (cur[3] == ':'))
3072 return(xmlStrdup(name));
3073#endif
3074
Daniel Veillard597bc482003-07-24 16:08:28 +00003075 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00003076 if (cur[0] == ':')
3077 return(xmlStrdup(name));
3078
3079 c = *cur++;
3080 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3081 buf[len++] = c;
3082 c = *cur++;
3083 }
3084 if (len >= max) {
3085 /*
3086 * Okay someone managed to make a huge name, so he's ready to pay
3087 * for the processing speed.
3088 */
3089 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003090
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003091 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003092 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003093 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003094 return(NULL);
3095 }
3096 memcpy(buffer, buf, len);
3097 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3098 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003099 xmlChar *tmp;
3100
Owen Taylor3473f882001-02-23 17:55:21 +00003101 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003102 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003103 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003104 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003105 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003106 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003107 return(NULL);
3108 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003109 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003110 }
3111 buffer[len++] = c;
3112 c = *cur++;
3113 }
3114 buffer[len] = 0;
3115 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003116
Daniel Veillard597bc482003-07-24 16:08:28 +00003117 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003118 if (buffer != NULL)
3119 xmlFree(buffer);
3120 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003121 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003122 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003123
Owen Taylor3473f882001-02-23 17:55:21 +00003124 if (buffer == NULL)
3125 ret = xmlStrndup(buf, len);
3126 else {
3127 ret = buffer;
3128 buffer = NULL;
3129 max = XML_MAX_NAMELEN;
3130 }
3131
3132
3133 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003134 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003135 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003136 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003137 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003138 }
Owen Taylor3473f882001-02-23 17:55:21 +00003139 len = 0;
3140
Daniel Veillardbb284f42002-10-16 18:02:47 +00003141 /*
3142 * Check that the first character is proper to start
3143 * a new name
3144 */
3145 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3146 ((c >= 0x41) && (c <= 0x5A)) ||
3147 (c == '_') || (c == ':'))) {
3148 int l;
3149 int first = CUR_SCHAR(cur, l);
3150
3151 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003152 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003153 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003154 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003155 }
3156 }
3157 cur++;
3158
Owen Taylor3473f882001-02-23 17:55:21 +00003159 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3160 buf[len++] = c;
3161 c = *cur++;
3162 }
3163 if (len >= max) {
3164 /*
3165 * Okay someone managed to make a huge name, so he's ready to pay
3166 * for the processing speed.
3167 */
3168 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003169
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003170 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003171 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003172 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003173 return(NULL);
3174 }
3175 memcpy(buffer, buf, len);
3176 while (c != 0) { /* tested bigname2.xml */
3177 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003178 xmlChar *tmp;
3179
Owen Taylor3473f882001-02-23 17:55:21 +00003180 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003181 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003182 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003183 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003184 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003185 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003186 return(NULL);
3187 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003188 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003189 }
3190 buffer[len++] = c;
3191 c = *cur++;
3192 }
3193 buffer[len] = 0;
3194 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003195
Owen Taylor3473f882001-02-23 17:55:21 +00003196 if (buffer == NULL)
3197 ret = xmlStrndup(buf, len);
3198 else {
3199 ret = buffer;
3200 }
3201 }
3202
3203 return(ret);
3204}
3205
3206/************************************************************************
3207 * *
3208 * The parser itself *
3209 * Relates to http://www.w3.org/TR/REC-xml *
3210 * *
3211 ************************************************************************/
3212
Daniel Veillard34e3f642008-07-29 09:02:27 +00003213/************************************************************************
3214 * *
3215 * Routines to parse Name, NCName and NmToken *
3216 * *
3217 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003218#ifdef DEBUG
3219static unsigned long nbParseName = 0;
3220static unsigned long nbParseNmToken = 0;
3221static unsigned long nbParseNCName = 0;
3222static unsigned long nbParseNCNameComplex = 0;
3223static unsigned long nbParseNameComplex = 0;
3224static unsigned long nbParseStringName = 0;
3225#endif
3226
Daniel Veillard34e3f642008-07-29 09:02:27 +00003227/*
3228 * The two following functions are related to the change of accepted
3229 * characters for Name and NmToken in the Revision 5 of XML-1.0
3230 * They correspond to the modified production [4] and the new production [4a]
3231 * changes in that revision. Also note that the macros used for the
3232 * productions Letter, Digit, CombiningChar and Extender are not needed
3233 * anymore.
3234 * We still keep compatibility to pre-revision5 parsing semantic if the
3235 * new XML_PARSE_OLD10 option is given to the parser.
3236 */
3237static int
3238xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3240 /*
3241 * Use the new checks of production [4] [4a] amd [5] of the
3242 * Update 5 of XML-1.0
3243 */
3244 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3245 (((c >= 'a') && (c <= 'z')) ||
3246 ((c >= 'A') && (c <= 'Z')) ||
3247 (c == '_') || (c == ':') ||
3248 ((c >= 0xC0) && (c <= 0xD6)) ||
3249 ((c >= 0xD8) && (c <= 0xF6)) ||
3250 ((c >= 0xF8) && (c <= 0x2FF)) ||
3251 ((c >= 0x370) && (c <= 0x37D)) ||
3252 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253 ((c >= 0x200C) && (c <= 0x200D)) ||
3254 ((c >= 0x2070) && (c <= 0x218F)) ||
3255 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259 ((c >= 0x10000) && (c <= 0xEFFFF))))
3260 return(1);
3261 } else {
3262 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3263 return(1);
3264 }
3265 return(0);
3266}
3267
3268static int
3269xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3270 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3271 /*
3272 * Use the new checks of production [4] [4a] amd [5] of the
3273 * Update 5 of XML-1.0
3274 */
3275 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3276 (((c >= 'a') && (c <= 'z')) ||
3277 ((c >= 'A') && (c <= 'Z')) ||
3278 ((c >= '0') && (c <= '9')) || /* !start */
3279 (c == '_') || (c == ':') ||
3280 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3281 ((c >= 0xC0) && (c <= 0xD6)) ||
3282 ((c >= 0xD8) && (c <= 0xF6)) ||
3283 ((c >= 0xF8) && (c <= 0x2FF)) ||
3284 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3285 ((c >= 0x370) && (c <= 0x37D)) ||
3286 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3287 ((c >= 0x200C) && (c <= 0x200D)) ||
3288 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3289 ((c >= 0x2070) && (c <= 0x218F)) ||
3290 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3291 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3292 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3293 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3294 ((c >= 0x10000) && (c <= 0xEFFFF))))
3295 return(1);
3296 } else {
3297 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3298 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003299 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003300 (IS_COMBINING(c)) ||
3301 (IS_EXTENDER(c)))
3302 return(1);
3303 }
3304 return(0);
3305}
3306
Daniel Veillarde57ec792003-09-10 10:50:59 +00003307static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003308 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003309
Daniel Veillard34e3f642008-07-29 09:02:27 +00003310static const xmlChar *
3311xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3312 int len = 0, l;
3313 int c;
3314 int count = 0;
3315
Daniel Veillardc6561462009-03-25 10:22:31 +00003316#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003317 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003318#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003319
3320 /*
3321 * Handler for more complex cases
3322 */
3323 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003324 if (ctxt->instate == XML_PARSER_EOF)
3325 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003326 c = CUR_CHAR(l);
3327 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3328 /*
3329 * Use the new checks of production [4] [4a] amd [5] of the
3330 * Update 5 of XML-1.0
3331 */
3332 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3333 (!(((c >= 'a') && (c <= 'z')) ||
3334 ((c >= 'A') && (c <= 'Z')) ||
3335 (c == '_') || (c == ':') ||
3336 ((c >= 0xC0) && (c <= 0xD6)) ||
3337 ((c >= 0xD8) && (c <= 0xF6)) ||
3338 ((c >= 0xF8) && (c <= 0x2FF)) ||
3339 ((c >= 0x370) && (c <= 0x37D)) ||
3340 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3341 ((c >= 0x200C) && (c <= 0x200D)) ||
3342 ((c >= 0x2070) && (c <= 0x218F)) ||
3343 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3344 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3345 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3346 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3347 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3348 return(NULL);
3349 }
3350 len += l;
3351 NEXTL(l);
3352 c = CUR_CHAR(l);
3353 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3354 (((c >= 'a') && (c <= 'z')) ||
3355 ((c >= 'A') && (c <= 'Z')) ||
3356 ((c >= '0') && (c <= '9')) || /* !start */
3357 (c == '_') || (c == ':') ||
3358 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3359 ((c >= 0xC0) && (c <= 0xD6)) ||
3360 ((c >= 0xD8) && (c <= 0xF6)) ||
3361 ((c >= 0xF8) && (c <= 0x2FF)) ||
3362 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3363 ((c >= 0x370) && (c <= 0x37D)) ||
3364 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3365 ((c >= 0x200C) && (c <= 0x200D)) ||
3366 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3367 ((c >= 0x2070) && (c <= 0x218F)) ||
3368 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3369 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3370 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3371 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3372 ((c >= 0x10000) && (c <= 0xEFFFF))
3373 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003374 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003375 count = 0;
3376 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003377 if (ctxt->instate == XML_PARSER_EOF)
3378 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003379 }
3380 len += l;
3381 NEXTL(l);
3382 c = CUR_CHAR(l);
3383 }
3384 } else {
3385 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3386 (!IS_LETTER(c) && (c != '_') &&
3387 (c != ':'))) {
3388 return(NULL);
3389 }
3390 len += l;
3391 NEXTL(l);
3392 c = CUR_CHAR(l);
3393
3394 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3395 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3396 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003397 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003398 (IS_COMBINING(c)) ||
3399 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003400 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401 count = 0;
3402 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003403 if (ctxt->instate == XML_PARSER_EOF)
3404 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405 }
3406 len += l;
3407 NEXTL(l);
3408 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003409 if (c == 0) {
3410 count = 0;
3411 GROW;
3412 if (ctxt->instate == XML_PARSER_EOF)
3413 return(NULL);
3414 c = CUR_CHAR(l);
3415 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003416 }
3417 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003418 if ((len > XML_MAX_NAME_LENGTH) &&
3419 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3420 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3421 return(NULL);
3422 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003423 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3424 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3425 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3426}
3427
Owen Taylor3473f882001-02-23 17:55:21 +00003428/**
3429 * xmlParseName:
3430 * @ctxt: an XML parser context
3431 *
3432 * parse an XML name.
3433 *
3434 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3435 * CombiningChar | Extender
3436 *
3437 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3438 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003439 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003440 *
3441 * Returns the Name parsed or NULL
3442 */
3443
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003444const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003445xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003446 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003447 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003448 int count = 0;
3449
3450 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003451
Daniel Veillardc6561462009-03-25 10:22:31 +00003452#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003453 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003454#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003455
Daniel Veillard48b2f892001-02-25 16:11:03 +00003456 /*
3457 * Accelerator for simple ASCII names
3458 */
3459 in = ctxt->input->cur;
3460 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3461 ((*in >= 0x41) && (*in <= 0x5A)) ||
3462 (*in == '_') || (*in == ':')) {
3463 in++;
3464 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3465 ((*in >= 0x41) && (*in <= 0x5A)) ||
3466 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003467 (*in == '_') || (*in == '-') ||
3468 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003469 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003470 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003471 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003472 if ((count > XML_MAX_NAME_LENGTH) &&
3473 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3474 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3475 return(NULL);
3476 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003477 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003478 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003479 ctxt->nbChars += count;
3480 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003481 if (ret == NULL)
3482 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003483 return(ret);
3484 }
3485 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003486 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003487 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003488}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003489
Daniel Veillard34e3f642008-07-29 09:02:27 +00003490static const xmlChar *
3491xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3492 int len = 0, l;
3493 int c;
3494 int count = 0;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003495 size_t startPosition = 0;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003496
Daniel Veillardc6561462009-03-25 10:22:31 +00003497#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003498 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003499#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003500
3501 /*
3502 * Handler for more complex cases
3503 */
3504 GROW;
Pranjal Jumde45752d22016-03-03 11:50:34 -08003505 startPosition = CUR_PTR - BASE_PTR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003506 c = CUR_CHAR(l);
3507 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3508 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3509 return(NULL);
3510 }
3511
3512 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3513 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003514 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003515 if ((len > XML_MAX_NAME_LENGTH) &&
3516 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3517 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3518 return(NULL);
3519 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003520 count = 0;
3521 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003522 if (ctxt->instate == XML_PARSER_EOF)
3523 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003524 }
3525 len += l;
3526 NEXTL(l);
3527 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003528 if (c == 0) {
3529 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003530 /*
3531 * when shrinking to extend the buffer we really need to preserve
3532 * the part of the name we already parsed. Hence rolling back
3533 * by current lenght.
3534 */
3535 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003536 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003537 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003538 if (ctxt->instate == XML_PARSER_EOF)
3539 return(NULL);
3540 c = CUR_CHAR(l);
3541 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003542 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003543 if ((len > XML_MAX_NAME_LENGTH) &&
3544 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3545 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3546 return(NULL);
3547 }
Pranjal Jumde45752d22016-03-03 11:50:34 -08003548 return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003549}
3550
3551/**
3552 * xmlParseNCName:
3553 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003554 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003555 *
3556 * parse an XML name.
3557 *
3558 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3559 * CombiningChar | Extender
3560 *
3561 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3562 *
3563 * Returns the Name parsed or NULL
3564 */
3565
3566static const xmlChar *
3567xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003568 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003569 const xmlChar *ret;
3570 int count = 0;
3571
Daniel Veillardc6561462009-03-25 10:22:31 +00003572#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003573 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003574#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003575
3576 /*
3577 * Accelerator for simple ASCII names
3578 */
3579 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003580 e = ctxt->input->end;
3581 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3582 ((*in >= 0x41) && (*in <= 0x5A)) ||
3583 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003584 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003585 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3586 ((*in >= 0x41) && (*in <= 0x5A)) ||
3587 ((*in >= 0x30) && (*in <= 0x39)) ||
3588 (*in == '_') || (*in == '-') ||
3589 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003590 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003591 if (in >= e)
3592 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003593 if ((*in > 0) && (*in < 0x80)) {
3594 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003595 if ((count > XML_MAX_NAME_LENGTH) &&
3596 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3597 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3598 return(NULL);
3599 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003600 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3601 ctxt->input->cur = in;
3602 ctxt->nbChars += count;
3603 ctxt->input->col += count;
3604 if (ret == NULL) {
3605 xmlErrMemory(ctxt, NULL);
3606 }
3607 return(ret);
3608 }
3609 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003610complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003611 return(xmlParseNCNameComplex(ctxt));
3612}
3613
Daniel Veillard46de64e2002-05-29 08:21:33 +00003614/**
3615 * xmlParseNameAndCompare:
3616 * @ctxt: an XML parser context
3617 *
3618 * parse an XML name and compares for match
3619 * (specialized for endtag parsing)
3620 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003621 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3622 * and the name for mismatch
3623 */
3624
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003625static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003626xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003627 register const xmlChar *cmp = other;
3628 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003629 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003630
3631 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003632 if (ctxt->instate == XML_PARSER_EOF)
3633 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003634
Daniel Veillard46de64e2002-05-29 08:21:33 +00003635 in = ctxt->input->cur;
3636 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003637 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003638 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003639 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003640 }
William M. Brack76e95df2003-10-18 16:20:14 +00003641 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003642 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003643 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003644 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003645 }
3646 /* failure (or end of input buffer), check with full function */
3647 ret = xmlParseName (ctxt);
Jan Pokornýbb654fe2016-04-13 16:56:07 +02003648 /* strings coming from the dictionary direct compare possible */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003649 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003650 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003651 }
3652 return ret;
3653}
3654
Owen Taylor3473f882001-02-23 17:55:21 +00003655/**
3656 * xmlParseStringName:
3657 * @ctxt: an XML parser context
3658 * @str: a pointer to the string pointer (IN/OUT)
3659 *
3660 * parse an XML name.
3661 *
3662 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3663 * CombiningChar | Extender
3664 *
3665 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3666 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003667 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003668 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003669 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003670 * is updated to the current location in the string.
3671 */
3672
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003673static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003674xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3675 xmlChar buf[XML_MAX_NAMELEN + 5];
3676 const xmlChar *cur = *str;
3677 int len = 0, l;
3678 int c;
3679
Daniel Veillardc6561462009-03-25 10:22:31 +00003680#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003681 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003682#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003683
Owen Taylor3473f882001-02-23 17:55:21 +00003684 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003685 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003686 return(NULL);
3687 }
3688
Daniel Veillard34e3f642008-07-29 09:02:27 +00003689 COPY_BUF(l,buf,len,c);
3690 cur += l;
3691 c = CUR_SCHAR(cur, l);
3692 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003693 COPY_BUF(l,buf,len,c);
3694 cur += l;
3695 c = CUR_SCHAR(cur, l);
3696 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3697 /*
3698 * Okay someone managed to make a huge name, so he's ready to pay
3699 * for the processing speed.
3700 */
3701 xmlChar *buffer;
3702 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003703
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003704 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003705 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003706 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003707 return(NULL);
3708 }
3709 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003710 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003711 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003712 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003713
3714 if ((len > XML_MAX_NAME_LENGTH) &&
3715 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3716 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3717 xmlFree(buffer);
3718 return(NULL);
3719 }
Owen Taylor3473f882001-02-23 17:55:21 +00003720 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003721 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003722 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003723 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003724 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003725 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003726 return(NULL);
3727 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003728 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003729 }
3730 COPY_BUF(l,buffer,len,c);
3731 cur += l;
3732 c = CUR_SCHAR(cur, l);
3733 }
3734 buffer[len] = 0;
3735 *str = cur;
3736 return(buffer);
3737 }
3738 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003739 if ((len > XML_MAX_NAME_LENGTH) &&
3740 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3741 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3742 return(NULL);
3743 }
Owen Taylor3473f882001-02-23 17:55:21 +00003744 *str = cur;
3745 return(xmlStrndup(buf, len));
3746}
3747
3748/**
3749 * xmlParseNmtoken:
3750 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003751 *
Owen Taylor3473f882001-02-23 17:55:21 +00003752 * parse an XML Nmtoken.
3753 *
3754 * [7] Nmtoken ::= (NameChar)+
3755 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003756 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003757 *
3758 * Returns the Nmtoken parsed or NULL
3759 */
3760
3761xmlChar *
3762xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3763 xmlChar buf[XML_MAX_NAMELEN + 5];
3764 int len = 0, l;
3765 int c;
3766 int count = 0;
3767
Daniel Veillardc6561462009-03-25 10:22:31 +00003768#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003769 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003770#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003771
Owen Taylor3473f882001-02-23 17:55:21 +00003772 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003773 if (ctxt->instate == XML_PARSER_EOF)
3774 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003775 c = CUR_CHAR(l);
3776
Daniel Veillard34e3f642008-07-29 09:02:27 +00003777 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003778 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003779 count = 0;
3780 GROW;
3781 }
3782 COPY_BUF(l,buf,len,c);
3783 NEXTL(l);
3784 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003785 if (c == 0) {
3786 count = 0;
3787 GROW;
3788 if (ctxt->instate == XML_PARSER_EOF)
3789 return(NULL);
3790 c = CUR_CHAR(l);
3791 }
Owen Taylor3473f882001-02-23 17:55:21 +00003792 if (len >= XML_MAX_NAMELEN) {
3793 /*
3794 * Okay someone managed to make a huge token, so he's ready to pay
3795 * for the processing speed.
3796 */
3797 xmlChar *buffer;
3798 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003799
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003800 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003801 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003802 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003803 return(NULL);
3804 }
3805 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003806 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003807 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003808 count = 0;
3809 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003810 if (ctxt->instate == XML_PARSER_EOF) {
3811 xmlFree(buffer);
3812 return(NULL);
3813 }
Owen Taylor3473f882001-02-23 17:55:21 +00003814 }
3815 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003816 xmlChar *tmp;
3817
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003818 if ((max > XML_MAX_NAME_LENGTH) &&
3819 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3820 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3821 xmlFree(buffer);
3822 return(NULL);
3823 }
Owen Taylor3473f882001-02-23 17:55:21 +00003824 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003825 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003826 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003827 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003828 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003829 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003830 return(NULL);
3831 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003832 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003833 }
3834 COPY_BUF(l,buffer,len,c);
3835 NEXTL(l);
3836 c = CUR_CHAR(l);
3837 }
3838 buffer[len] = 0;
3839 return(buffer);
3840 }
3841 }
3842 if (len == 0)
3843 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003844 if ((len > XML_MAX_NAME_LENGTH) &&
3845 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3846 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3847 return(NULL);
3848 }
Owen Taylor3473f882001-02-23 17:55:21 +00003849 return(xmlStrndup(buf, len));
3850}
3851
3852/**
3853 * xmlParseEntityValue:
3854 * @ctxt: an XML parser context
3855 * @orig: if non-NULL store a copy of the original entity value
3856 *
3857 * parse a value for ENTITY declarations
3858 *
3859 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3860 * "'" ([^%&'] | PEReference | Reference)* "'"
3861 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003862 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003863 */
3864
3865xmlChar *
3866xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3867 xmlChar *buf = NULL;
3868 int len = 0;
3869 int size = XML_PARSER_BUFFER_SIZE;
3870 int c, l;
3871 xmlChar stop;
3872 xmlChar *ret = NULL;
3873 const xmlChar *cur = NULL;
3874 xmlParserInputPtr input;
3875
3876 if (RAW == '"') stop = '"';
3877 else if (RAW == '\'') stop = '\'';
3878 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003879 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003880 return(NULL);
3881 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003882 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003883 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003884 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003885 return(NULL);
3886 }
3887
3888 /*
3889 * The content of the entity definition is copied in a buffer.
3890 */
3891
3892 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3893 input = ctxt->input;
3894 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003895 if (ctxt->instate == XML_PARSER_EOF) {
3896 xmlFree(buf);
3897 return(NULL);
3898 }
Owen Taylor3473f882001-02-23 17:55:21 +00003899 NEXT;
3900 c = CUR_CHAR(l);
3901 /*
3902 * NOTE: 4.4.5 Included in Literal
3903 * When a parameter entity reference appears in a literal entity
3904 * value, ... a single or double quote character in the replacement
3905 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003906 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003907 * In practice it means we stop the loop only when back at parsing
3908 * the initial entity and the quote is found
3909 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003910 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3911 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003912 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003913 xmlChar *tmp;
3914
Owen Taylor3473f882001-02-23 17:55:21 +00003915 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003916 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3917 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003918 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003919 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003920 return(NULL);
3921 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003922 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003923 }
3924 COPY_BUF(l,buf,len,c);
3925 NEXTL(l);
3926 /*
3927 * Pop-up of finished entities.
3928 */
3929 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3930 xmlPopInput(ctxt);
3931
3932 GROW;
3933 c = CUR_CHAR(l);
3934 if (c == 0) {
3935 GROW;
3936 c = CUR_CHAR(l);
3937 }
3938 }
3939 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003940 if (ctxt->instate == XML_PARSER_EOF) {
3941 xmlFree(buf);
3942 return(NULL);
3943 }
Owen Taylor3473f882001-02-23 17:55:21 +00003944
3945 /*
3946 * Raise problem w.r.t. '&' and '%' being used in non-entities
3947 * reference constructs. Note Charref will be handled in
3948 * xmlStringDecodeEntities()
3949 */
3950 cur = buf;
3951 while (*cur != 0) { /* non input consuming */
3952 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3953 xmlChar *name;
3954 xmlChar tmp = *cur;
3955
3956 cur++;
3957 name = xmlParseStringName(ctxt, &cur);
3958 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003959 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003960 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003961 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003962 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003963 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3964 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003965 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003966 }
3967 if (name != NULL)
3968 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003969 if (*cur == 0)
3970 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003971 }
3972 cur++;
3973 }
3974
3975 /*
3976 * Then PEReference entities are substituted.
3977 */
3978 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003979 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003980 xmlFree(buf);
3981 } else {
3982 NEXT;
3983 /*
3984 * NOTE: 4.4.7 Bypassed
3985 * When a general entity reference appears in the EntityValue in
3986 * an entity declaration, it is bypassed and left as is.
3987 * so XML_SUBSTITUTE_REF is not set here.
3988 */
Peter Simons8f30bdf2016-04-15 11:56:55 +02003989 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00003990 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3991 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02003992 --ctxt->depth;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003993 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003994 *orig = buf;
3995 else
3996 xmlFree(buf);
3997 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003998
Owen Taylor3473f882001-02-23 17:55:21 +00003999 return(ret);
4000}
4001
4002/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00004003 * xmlParseAttValueComplex:
4004 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00004005 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004006 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00004007 *
4008 * parse a value for an attribute, this is the fallback function
4009 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004010 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00004011 *
4012 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4013 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00004014static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004015xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00004016 xmlChar limit = 0;
4017 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004018 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004019 size_t len = 0;
4020 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004021 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004022 xmlChar *current = NULL;
4023 xmlEntityPtr ent;
4024
Owen Taylor3473f882001-02-23 17:55:21 +00004025 if (NXT(0) == '"') {
4026 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4027 limit = '"';
4028 NEXT;
4029 } else if (NXT(0) == '\'') {
4030 limit = '\'';
4031 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4032 NEXT;
4033 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004034 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004035 return(NULL);
4036 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00004037
Owen Taylor3473f882001-02-23 17:55:21 +00004038 /*
4039 * allocate a translation buffer.
4040 */
4041 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004042 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004043 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00004044
4045 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004046 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00004047 */
4048 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004049 while (((NXT(0) != limit) && /* checked */
4050 (IS_CHAR(c)) && (c != '<')) &&
4051 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08004052 /*
4053 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4054 * special option is given
4055 */
4056 if ((len > XML_MAX_TEXT_LENGTH) &&
4057 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4058 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004059 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08004060 goto mem_error;
4061 }
Owen Taylor3473f882001-02-23 17:55:21 +00004062 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00004063 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00004064 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004065 if (NXT(1) == '#') {
4066 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004067
Owen Taylor3473f882001-02-23 17:55:21 +00004068 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00004069 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004070 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004071 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004072 }
4073 buf[len++] = '&';
4074 } else {
4075 /*
4076 * The reparsing will be done in xmlStringGetNodeList()
4077 * called by the attribute() function in SAX.c
4078 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08004079 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004080 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004081 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004082 buf[len++] = '&';
4083 buf[len++] = '#';
4084 buf[len++] = '3';
4085 buf[len++] = '8';
4086 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00004087 }
Daniel Veillarddc171602008-03-26 17:41:38 +00004088 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004089 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004090 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004091 }
Owen Taylor3473f882001-02-23 17:55:21 +00004092 len += xmlCopyChar(0, &buf[len], val);
4093 }
4094 } else {
4095 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004096 ctxt->nbentities++;
4097 if (ent != NULL)
4098 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004099 if ((ent != NULL) &&
4100 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004101 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004102 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004103 }
4104 if ((ctxt->replaceEntities == 0) &&
4105 (ent->content[0] == '&')) {
4106 buf[len++] = '&';
4107 buf[len++] = '#';
4108 buf[len++] = '3';
4109 buf[len++] = '8';
4110 buf[len++] = ';';
4111 } else {
4112 buf[len++] = ent->content[0];
4113 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004114 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004115 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004116 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
Peter Simons8f30bdf2016-04-15 11:56:55 +02004117 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004118 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004119 XML_SUBSTITUTE_REF,
4120 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004121 --ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004122 if (rep != NULL) {
4123 current = rep;
4124 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004125 if ((*current == 0xD) || (*current == 0xA) ||
4126 (*current == 0x9)) {
4127 buf[len++] = 0x20;
4128 current++;
4129 } else
4130 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004131 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004132 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004133 }
4134 }
4135 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004136 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004137 }
4138 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004139 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004140 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004141 }
Owen Taylor3473f882001-02-23 17:55:21 +00004142 if (ent->content != NULL)
4143 buf[len++] = ent->content[0];
4144 }
4145 } else if (ent != NULL) {
4146 int i = xmlStrlen(ent->name);
4147 const xmlChar *cur = ent->name;
4148
4149 /*
4150 * This may look absurd but is needed to detect
4151 * entities problems
4152 */
4153 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004154 (ent->content != NULL) && (ent->checked == 0)) {
4155 unsigned long oldnbent = ctxt->nbentities;
4156
Peter Simons8f30bdf2016-04-15 11:56:55 +02004157 ++ctxt->depth;
Owen Taylor3473f882001-02-23 17:55:21 +00004158 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004159 XML_SUBSTITUTE_REF, 0, 0, 0);
Peter Simons8f30bdf2016-04-15 11:56:55 +02004160 --ctxt->depth;
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004161
Daniel Veillardcff25462013-03-11 15:57:55 +08004162 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004163 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004164 if (xmlStrchr(rep, '<'))
4165 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004166 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004167 rep = NULL;
4168 }
Owen Taylor3473f882001-02-23 17:55:21 +00004169 }
4170
4171 /*
4172 * Just output the reference
4173 */
4174 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004175 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004176 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004177 }
4178 for (;i > 0;i--)
4179 buf[len++] = *cur++;
4180 buf[len++] = ';';
4181 }
4182 }
4183 } else {
4184 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004185 if ((len != 0) || (!normalize)) {
4186 if ((!normalize) || (!in_space)) {
4187 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004188 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004189 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004190 }
4191 }
4192 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004193 }
4194 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004195 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004196 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004197 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004198 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004199 }
4200 }
4201 NEXTL(l);
4202 }
4203 GROW;
4204 c = CUR_CHAR(l);
4205 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004206 if (ctxt->instate == XML_PARSER_EOF)
4207 goto error;
4208
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004209 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004210 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004211 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004212 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004213 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004214 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004215 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004216 if ((c != 0) && (!IS_CHAR(c))) {
4217 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4218 "invalid character in attribute value\n");
4219 } else {
4220 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4221 "AttValue: ' expected\n");
4222 }
Owen Taylor3473f882001-02-23 17:55:21 +00004223 } else
4224 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004225
4226 /*
4227 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004228 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004229 */
4230 if (len >= INT_MAX) {
4231 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004232 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004233 goto mem_error;
4234 }
4235
4236 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004237 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004238
4239mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004240 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004241error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004242 if (buf != NULL)
4243 xmlFree(buf);
4244 if (rep != NULL)
4245 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004246 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004247}
4248
4249/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004250 * xmlParseAttValue:
4251 * @ctxt: an XML parser context
4252 *
4253 * parse a value for an attribute
4254 * Note: the parser won't do substitution of entities here, this
4255 * will be handled later in xmlStringGetNodeList
4256 *
4257 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4258 * "'" ([^<&'] | Reference)* "'"
4259 *
4260 * 3.3.3 Attribute-Value Normalization:
4261 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004262 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004263 * - a character reference is processed by appending the referenced
4264 * character to the attribute value
4265 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004266 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004267 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4268 * appending #x20 to the normalized value, except that only a single
4269 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004270 * parsed entity or the literal entity value of an internal parsed entity
4271 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004272 * If the declared value is not CDATA, then the XML processor must further
4273 * process the normalized attribute value by discarding any leading and
4274 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004275 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004276 * All attributes for which no declaration has been read should be treated
4277 * by a non-validating parser as if declared CDATA.
4278 *
4279 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4280 */
4281
4282
4283xmlChar *
4284xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004285 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004286 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004287}
4288
4289/**
Owen Taylor3473f882001-02-23 17:55:21 +00004290 * xmlParseSystemLiteral:
4291 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004292 *
Owen Taylor3473f882001-02-23 17:55:21 +00004293 * parse an XML Literal
4294 *
4295 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4296 *
4297 * Returns the SystemLiteral parsed or NULL
4298 */
4299
4300xmlChar *
4301xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4302 xmlChar *buf = NULL;
4303 int len = 0;
4304 int size = XML_PARSER_BUFFER_SIZE;
4305 int cur, l;
4306 xmlChar stop;
4307 int state = ctxt->instate;
4308 int count = 0;
4309
4310 SHRINK;
4311 if (RAW == '"') {
4312 NEXT;
4313 stop = '"';
4314 } else if (RAW == '\'') {
4315 NEXT;
4316 stop = '\'';
4317 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004318 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004319 return(NULL);
4320 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004321
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004322 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004323 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004324 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 return(NULL);
4326 }
4327 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4328 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004329 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004330 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004331 xmlChar *tmp;
4332
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004333 if ((size > XML_MAX_NAME_LENGTH) &&
4334 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4335 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4336 xmlFree(buf);
4337 ctxt->instate = (xmlParserInputState) state;
4338 return(NULL);
4339 }
Owen Taylor3473f882001-02-23 17:55:21 +00004340 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004341 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4342 if (tmp == NULL) {
4343 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004344 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004345 ctxt->instate = (xmlParserInputState) state;
4346 return(NULL);
4347 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004348 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004349 }
4350 count++;
4351 if (count > 50) {
4352 GROW;
4353 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004354 if (ctxt->instate == XML_PARSER_EOF) {
4355 xmlFree(buf);
4356 return(NULL);
4357 }
Owen Taylor3473f882001-02-23 17:55:21 +00004358 }
4359 COPY_BUF(l,buf,len,cur);
4360 NEXTL(l);
4361 cur = CUR_CHAR(l);
4362 if (cur == 0) {
4363 GROW;
4364 SHRINK;
4365 cur = CUR_CHAR(l);
4366 }
4367 }
4368 buf[len] = 0;
4369 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004370 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004371 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004372 } else {
4373 NEXT;
4374 }
4375 return(buf);
4376}
4377
4378/**
4379 * xmlParsePubidLiteral:
4380 * @ctxt: an XML parser context
4381 *
4382 * parse an XML public literal
4383 *
4384 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4385 *
4386 * Returns the PubidLiteral parsed or NULL.
4387 */
4388
4389xmlChar *
4390xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4391 xmlChar *buf = NULL;
4392 int len = 0;
4393 int size = XML_PARSER_BUFFER_SIZE;
4394 xmlChar cur;
4395 xmlChar stop;
4396 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004397 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004398
4399 SHRINK;
4400 if (RAW == '"') {
4401 NEXT;
4402 stop = '"';
4403 } else if (RAW == '\'') {
4404 NEXT;
4405 stop = '\'';
4406 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004407 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004408 return(NULL);
4409 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004410 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004411 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004412 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004413 return(NULL);
4414 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004415 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004416 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004417 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004418 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004419 xmlChar *tmp;
4420
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004421 if ((size > XML_MAX_NAME_LENGTH) &&
4422 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4423 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4424 xmlFree(buf);
4425 return(NULL);
4426 }
Owen Taylor3473f882001-02-23 17:55:21 +00004427 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004428 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4429 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004431 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004432 return(NULL);
4433 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004434 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004435 }
4436 buf[len++] = cur;
4437 count++;
4438 if (count > 50) {
4439 GROW;
4440 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004441 if (ctxt->instate == XML_PARSER_EOF) {
4442 xmlFree(buf);
4443 return(NULL);
4444 }
Owen Taylor3473f882001-02-23 17:55:21 +00004445 }
4446 NEXT;
4447 cur = CUR;
4448 if (cur == 0) {
4449 GROW;
4450 SHRINK;
4451 cur = CUR;
4452 }
4453 }
4454 buf[len] = 0;
4455 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004456 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004457 } else {
4458 NEXT;
4459 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004460 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004461 return(buf);
4462}
4463
Daniel Veillard8ed10722009-08-20 19:17:36 +02004464static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004465
4466/*
4467 * used for the test in the inner loop of the char data testing
4468 */
4469static const unsigned char test_char_data[256] = {
4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4475 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4476 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4477 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4478 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4479 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4480 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4481 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4482 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4483 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4484 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4485 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4486 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4487 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4488 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4489 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4490 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4491 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4492 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4493 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4494 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4495 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4496 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4497 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4498 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4499 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4500 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4501 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4502};
4503
Owen Taylor3473f882001-02-23 17:55:21 +00004504/**
4505 * xmlParseCharData:
4506 * @ctxt: an XML parser context
4507 * @cdata: int indicating whether we are within a CDATA section
4508 *
4509 * parse a CharData section.
4510 * if we are within a CDATA section ']]>' marks an end of section.
4511 *
4512 * The right angle bracket (>) may be represented using the string "&gt;",
4513 * and must, for compatibility, be escaped using "&gt;" or a character
4514 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004515 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004516 *
4517 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4518 */
4519
4520void
4521xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004522 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004523 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004524 int line = ctxt->input->line;
4525 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004526 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004527
4528 SHRINK;
4529 GROW;
4530 /*
4531 * Accelerated common case where input don't need to be
4532 * modified before passing it to the handler.
4533 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004534 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004535 in = ctxt->input->cur;
4536 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004537get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004538 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004539 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004540 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004541 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004542 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004543 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004544 goto get_more_space;
4545 }
4546 if (*in == '<') {
4547 nbchar = in - ctxt->input->cur;
4548 if (nbchar > 0) {
4549 const xmlChar *tmp = ctxt->input->cur;
4550 ctxt->input->cur = in;
4551
Daniel Veillard34099b42004-11-04 17:34:35 +00004552 if ((ctxt->sax != NULL) &&
4553 (ctxt->sax->ignorableWhitespace !=
4554 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004555 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004556 if (ctxt->sax->ignorableWhitespace != NULL)
4557 ctxt->sax->ignorableWhitespace(ctxt->userData,
4558 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004559 } else {
4560 if (ctxt->sax->characters != NULL)
4561 ctxt->sax->characters(ctxt->userData,
4562 tmp, nbchar);
4563 if (*ctxt->space == -1)
4564 *ctxt->space = -2;
4565 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004566 } else if ((ctxt->sax != NULL) &&
4567 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004568 ctxt->sax->characters(ctxt->userData,
4569 tmp, nbchar);
4570 }
4571 }
4572 return;
4573 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004574
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004575get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004576 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004577 while (test_char_data[*in]) {
4578 in++;
4579 ccol++;
4580 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004581 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004582 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004583 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004584 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004585 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004586 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004587 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004588 }
4589 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004590 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004591 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004592 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004593 return;
4594 }
4595 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004596 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004597 goto get_more;
4598 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004599 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004600 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004601 if ((ctxt->sax != NULL) &&
4602 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004603 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004604 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004605 const xmlChar *tmp = ctxt->input->cur;
4606 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004607
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004608 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004609 if (ctxt->sax->ignorableWhitespace != NULL)
4610 ctxt->sax->ignorableWhitespace(ctxt->userData,
4611 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004612 } else {
4613 if (ctxt->sax->characters != NULL)
4614 ctxt->sax->characters(ctxt->userData,
4615 tmp, nbchar);
4616 if (*ctxt->space == -1)
4617 *ctxt->space = -2;
4618 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004619 line = ctxt->input->line;
4620 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004621 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004622 if (ctxt->sax->characters != NULL)
4623 ctxt->sax->characters(ctxt->userData,
4624 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004625 line = ctxt->input->line;
4626 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004627 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004628 /* something really bad happened in the SAX callback */
4629 if (ctxt->instate != XML_PARSER_CONTENT)
4630 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004631 }
4632 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004633 if (*in == 0xD) {
4634 in++;
4635 if (*in == 0xA) {
4636 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004637 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004638 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004639 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004640 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004641 in--;
4642 }
4643 if (*in == '<') {
4644 return;
4645 }
4646 if (*in == '&') {
4647 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004648 }
4649 SHRINK;
4650 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004651 if (ctxt->instate == XML_PARSER_EOF)
4652 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004653 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004654 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004655 nbchar = 0;
4656 }
Daniel Veillard50582112001-03-26 22:52:16 +00004657 ctxt->input->line = line;
4658 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004659 xmlParseCharDataComplex(ctxt, cdata);
4660}
4661
Daniel Veillard01c13b52002-12-10 15:19:08 +00004662/**
4663 * xmlParseCharDataComplex:
4664 * @ctxt: an XML parser context
4665 * @cdata: int indicating whether we are within a CDATA section
4666 *
4667 * parse a CharData section.this is the fallback function
4668 * of xmlParseCharData() when the parsing requires handling
4669 * of non-ASCII characters.
4670 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004671static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004672xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004673 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4674 int nbchar = 0;
4675 int cur, l;
4676 int count = 0;
4677
4678 SHRINK;
4679 GROW;
4680 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004681 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004682 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004683 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004684 if ((cur == ']') && (NXT(1) == ']') &&
4685 (NXT(2) == '>')) {
4686 if (cdata) break;
4687 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004688 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004689 }
4690 }
4691 COPY_BUF(l,buf,nbchar,cur);
4692 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004693 buf[nbchar] = 0;
4694
Owen Taylor3473f882001-02-23 17:55:21 +00004695 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004696 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004697 */
4698 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004699 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004700 if (ctxt->sax->ignorableWhitespace != NULL)
4701 ctxt->sax->ignorableWhitespace(ctxt->userData,
4702 buf, nbchar);
4703 } else {
4704 if (ctxt->sax->characters != NULL)
4705 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004706 if ((ctxt->sax->characters !=
4707 ctxt->sax->ignorableWhitespace) &&
4708 (*ctxt->space == -1))
4709 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004710 }
4711 }
4712 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004713 /* something really bad happened in the SAX callback */
4714 if (ctxt->instate != XML_PARSER_CONTENT)
4715 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004716 }
4717 count++;
4718 if (count > 50) {
4719 GROW;
4720 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004721 if (ctxt->instate == XML_PARSER_EOF)
4722 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
4724 NEXTL(l);
4725 cur = CUR_CHAR(l);
4726 }
4727 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004728 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004729 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004730 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004731 */
4732 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004733 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004734 if (ctxt->sax->ignorableWhitespace != NULL)
4735 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4736 } else {
4737 if (ctxt->sax->characters != NULL)
4738 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004739 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4740 (*ctxt->space == -1))
4741 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004742 }
4743 }
4744 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004745 if ((cur != 0) && (!IS_CHAR(cur))) {
4746 /* Generate the error and skip the offending character */
4747 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4748 "PCDATA invalid Char value %d\n",
4749 cur);
4750 NEXTL(l);
4751 }
Owen Taylor3473f882001-02-23 17:55:21 +00004752}
4753
4754/**
4755 * xmlParseExternalID:
4756 * @ctxt: an XML parser context
4757 * @publicID: a xmlChar** receiving PubidLiteral
4758 * @strict: indicate whether we should restrict parsing to only
4759 * production [75], see NOTE below
4760 *
4761 * Parse an External ID or a Public ID
4762 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004763 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004764 * 'PUBLIC' S PubidLiteral S SystemLiteral
4765 *
4766 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4767 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4768 *
4769 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4770 *
4771 * Returns the function returns SystemLiteral and in the second
4772 * case publicID receives PubidLiteral, is strict is off
4773 * it is possible to return NULL and have publicID set.
4774 */
4775
4776xmlChar *
4777xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4778 xmlChar *URI = NULL;
4779
4780 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004781
4782 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004783 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004784 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004785 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004786 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4787 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004788 }
4789 SKIP_BLANKS;
4790 URI = xmlParseSystemLiteral(ctxt);
4791 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004792 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004794 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004795 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004796 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004797 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004798 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004799 }
4800 SKIP_BLANKS;
4801 *publicID = xmlParsePubidLiteral(ctxt);
4802 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004803 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004804 }
4805 if (strict) {
4806 /*
4807 * We don't handle [83] so "S SystemLiteral" is required.
4808 */
William M. Brack76e95df2003-10-18 16:20:14 +00004809 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004810 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004811 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004812 }
4813 } else {
4814 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004815 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004816 * "S SystemLiteral" is not detected. From a purely parsing
4817 * point of view that's a nice mess.
4818 */
4819 const xmlChar *ptr;
4820 GROW;
4821
4822 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004823 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004824
William M. Brack76e95df2003-10-18 16:20:14 +00004825 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004826 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4827 }
4828 SKIP_BLANKS;
4829 URI = xmlParseSystemLiteral(ctxt);
4830 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004831 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004832 }
4833 }
4834 return(URI);
4835}
4836
4837/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004838 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004839 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004840 * @buf: the already parsed part of the buffer
4841 * @len: number of bytes filles in the buffer
4842 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004843 *
4844 * Skip an XML (SGML) comment <!-- .... -->
4845 * The spec says that "For compatibility, the string "--" (double-hyphen)
4846 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004847 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004848 *
4849 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4850 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004851static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004852xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4853 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004854 int q, ql;
4855 int r, rl;
4856 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004857 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004858 int inputid;
4859
4860 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004861
Owen Taylor3473f882001-02-23 17:55:21 +00004862 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004863 len = 0;
4864 size = XML_PARSER_BUFFER_SIZE;
4865 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4866 if (buf == NULL) {
4867 xmlErrMemory(ctxt, NULL);
4868 return;
4869 }
Owen Taylor3473f882001-02-23 17:55:21 +00004870 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004871 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004872 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004873 if (q == 0)
4874 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004875 if (!IS_CHAR(q)) {
4876 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4877 "xmlParseComment: invalid xmlChar value %d\n",
4878 q);
4879 xmlFree (buf);
4880 return;
4881 }
Owen Taylor3473f882001-02-23 17:55:21 +00004882 NEXTL(ql);
4883 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004884 if (r == 0)
4885 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004886 if (!IS_CHAR(r)) {
4887 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4888 "xmlParseComment: invalid xmlChar value %d\n",
4889 q);
4890 xmlFree (buf);
4891 return;
4892 }
Owen Taylor3473f882001-02-23 17:55:21 +00004893 NEXTL(rl);
4894 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004895 if (cur == 0)
4896 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004897 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004898 ((cur != '>') ||
4899 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004900 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004901 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004902 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004903 if ((len > XML_MAX_TEXT_LENGTH) &&
4904 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4905 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4906 "Comment too big found", NULL);
4907 xmlFree (buf);
4908 return;
4909 }
Owen Taylor3473f882001-02-23 17:55:21 +00004910 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004911 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004912 size_t new_size;
4913
4914 new_size = size * 2;
4915 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004916 if (new_buf == NULL) {
4917 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004918 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004919 return;
4920 }
William M. Bracka3215c72004-07-31 16:24:01 +00004921 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004922 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004923 }
4924 COPY_BUF(ql,buf,len,q);
4925 q = r;
4926 ql = rl;
4927 r = cur;
4928 rl = l;
4929
4930 count++;
4931 if (count > 50) {
4932 GROW;
4933 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004934 if (ctxt->instate == XML_PARSER_EOF) {
4935 xmlFree(buf);
4936 return;
4937 }
Owen Taylor3473f882001-02-23 17:55:21 +00004938 }
4939 NEXTL(l);
4940 cur = CUR_CHAR(l);
4941 if (cur == 0) {
4942 SHRINK;
4943 GROW;
4944 cur = CUR_CHAR(l);
4945 }
4946 }
4947 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004948 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004949 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004950 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004951 } else if (!IS_CHAR(cur)) {
4952 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4953 "xmlParseComment: invalid xmlChar value %d\n",
4954 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004956 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004957 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4958 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004959 }
4960 NEXT;
4961 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4962 (!ctxt->disableSAX))
4963 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004964 }
Daniel Veillardda629342007-08-01 07:49:06 +00004965 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004966 return;
4967not_terminated:
4968 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4969 "Comment not terminated\n", NULL);
4970 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004971 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004972}
Daniel Veillardda629342007-08-01 07:49:06 +00004973
Daniel Veillard4c778d82005-01-23 17:37:44 +00004974/**
4975 * xmlParseComment:
4976 * @ctxt: an XML parser context
4977 *
4978 * Skip an XML (SGML) comment <!-- .... -->
4979 * The spec says that "For compatibility, the string "--" (double-hyphen)
4980 * must not occur within comments. "
4981 *
4982 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4983 */
4984void
4985xmlParseComment(xmlParserCtxtPtr ctxt) {
4986 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004987 size_t size = XML_PARSER_BUFFER_SIZE;
4988 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004989 xmlParserInputState state;
4990 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004991 size_t nbchar = 0;
4992 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004993 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004994
4995 /*
4996 * Check that there is a comment right here.
4997 */
4998 if ((RAW != '<') || (NXT(1) != '!') ||
4999 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005000 state = ctxt->instate;
5001 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00005002 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005003 SKIP(4);
5004 SHRINK;
5005 GROW;
5006
5007 /*
5008 * Accelerated common case where input don't need to be
5009 * modified before passing it to the handler.
5010 */
5011 in = ctxt->input->cur;
5012 do {
5013 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005014 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005015 ctxt->input->line++; ctxt->input->col = 1;
5016 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005017 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005018 }
5019get_more:
5020 ccol = ctxt->input->col;
5021 while (((*in > '-') && (*in <= 0x7F)) ||
5022 ((*in >= 0x20) && (*in < '-')) ||
5023 (*in == 0x09)) {
5024 in++;
5025 ccol++;
5026 }
5027 ctxt->input->col = ccol;
5028 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005029 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005030 ctxt->input->line++; ctxt->input->col = 1;
5031 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005032 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005033 goto get_more;
5034 }
5035 nbchar = in - ctxt->input->cur;
5036 /*
5037 * save current set of data
5038 */
5039 if (nbchar > 0) {
5040 if ((ctxt->sax != NULL) &&
5041 (ctxt->sax->comment != NULL)) {
5042 if (buf == NULL) {
5043 if ((*in == '-') && (in[1] == '-'))
5044 size = nbchar + 1;
5045 else
5046 size = XML_PARSER_BUFFER_SIZE + nbchar;
5047 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5048 if (buf == NULL) {
5049 xmlErrMemory(ctxt, NULL);
5050 ctxt->instate = state;
5051 return;
5052 }
5053 len = 0;
5054 } else if (len + nbchar + 1 >= size) {
5055 xmlChar *new_buf;
5056 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5057 new_buf = (xmlChar *) xmlRealloc(buf,
5058 size * sizeof(xmlChar));
5059 if (new_buf == NULL) {
5060 xmlFree (buf);
5061 xmlErrMemory(ctxt, NULL);
5062 ctxt->instate = state;
5063 return;
5064 }
5065 buf = new_buf;
5066 }
5067 memcpy(&buf[len], ctxt->input->cur, nbchar);
5068 len += nbchar;
5069 buf[len] = 0;
5070 }
5071 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08005072 if ((len > XML_MAX_TEXT_LENGTH) &&
5073 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5074 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5075 "Comment too big found", NULL);
5076 xmlFree (buf);
5077 return;
5078 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005079 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00005080 if (*in == 0xA) {
5081 in++;
5082 ctxt->input->line++; ctxt->input->col = 1;
5083 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005084 if (*in == 0xD) {
5085 in++;
5086 if (*in == 0xA) {
5087 ctxt->input->cur = in;
5088 in++;
5089 ctxt->input->line++; ctxt->input->col = 1;
5090 continue; /* while */
5091 }
5092 in--;
5093 }
5094 SHRINK;
5095 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005096 if (ctxt->instate == XML_PARSER_EOF) {
5097 xmlFree(buf);
5098 return;
5099 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005100 in = ctxt->input->cur;
5101 if (*in == '-') {
5102 if (in[1] == '-') {
5103 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005104 if (ctxt->input->id != inputid) {
5105 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5106 "comment doesn't start and stop in the same entity\n");
5107 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005108 SKIP(3);
5109 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5110 (!ctxt->disableSAX)) {
5111 if (buf != NULL)
5112 ctxt->sax->comment(ctxt->userData, buf);
5113 else
5114 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5115 }
5116 if (buf != NULL)
5117 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005118 if (ctxt->instate != XML_PARSER_EOF)
5119 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005120 return;
5121 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005122 if (buf != NULL) {
5123 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5124 "Double hyphen within comment: "
5125 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005126 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005127 } else
5128 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5129 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005130 in++;
5131 ctxt->input->col++;
5132 }
5133 in++;
5134 ctxt->input->col++;
5135 goto get_more;
5136 }
5137 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5138 xmlParseCommentComplex(ctxt, buf, len, size);
5139 ctxt->instate = state;
5140 return;
5141}
5142
Owen Taylor3473f882001-02-23 17:55:21 +00005143
5144/**
5145 * xmlParsePITarget:
5146 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005147 *
Owen Taylor3473f882001-02-23 17:55:21 +00005148 * parse the name of a PI
5149 *
5150 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5151 *
5152 * Returns the PITarget name or NULL
5153 */
5154
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005155const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005156xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005157 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005158
5159 name = xmlParseName(ctxt);
5160 if ((name != NULL) &&
5161 ((name[0] == 'x') || (name[0] == 'X')) &&
5162 ((name[1] == 'm') || (name[1] == 'M')) &&
5163 ((name[2] == 'l') || (name[2] == 'L'))) {
5164 int i;
5165 if ((name[0] == 'x') && (name[1] == 'm') &&
5166 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005167 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005168 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005169 return(name);
5170 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005171 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005172 return(name);
5173 }
5174 for (i = 0;;i++) {
5175 if (xmlW3CPIs[i] == NULL) break;
5176 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5177 return(name);
5178 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005179 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5180 "xmlParsePITarget: invalid name prefix 'xml'\n",
5181 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005182 }
Daniel Veillard37334572008-07-31 08:20:02 +00005183 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005184 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005185 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005186 }
Owen Taylor3473f882001-02-23 17:55:21 +00005187 return(name);
5188}
5189
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005190#ifdef LIBXML_CATALOG_ENABLED
5191/**
5192 * xmlParseCatalogPI:
5193 * @ctxt: an XML parser context
5194 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005195 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005196 * parse an XML Catalog Processing Instruction.
5197 *
5198 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5199 *
5200 * Occurs only if allowed by the user and if happening in the Misc
5201 * part of the document before any doctype informations
5202 * This will add the given catalog to the parsing context in order
5203 * to be used if there is a resolution need further down in the document
5204 */
5205
5206static void
5207xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5208 xmlChar *URL = NULL;
5209 const xmlChar *tmp, *base;
5210 xmlChar marker;
5211
5212 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005213 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005214 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5215 goto error;
5216 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005217 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005218 if (*tmp != '=') {
5219 return;
5220 }
5221 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005222 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005223 marker = *tmp;
5224 if ((marker != '\'') && (marker != '"'))
5225 goto error;
5226 tmp++;
5227 base = tmp;
5228 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5229 if (*tmp == 0)
5230 goto error;
5231 URL = xmlStrndup(base, tmp - base);
5232 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005233 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005234 if (*tmp != 0)
5235 goto error;
5236
5237 if (URL != NULL) {
5238 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5239 xmlFree(URL);
5240 }
5241 return;
5242
5243error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005244 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5245 "Catalog PI syntax error: %s\n",
5246 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005247 if (URL != NULL)
5248 xmlFree(URL);
5249}
5250#endif
5251
Owen Taylor3473f882001-02-23 17:55:21 +00005252/**
5253 * xmlParsePI:
5254 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005255 *
Owen Taylor3473f882001-02-23 17:55:21 +00005256 * parse an XML Processing Instruction.
5257 *
5258 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5259 *
5260 * The processing is transfered to SAX once parsed.
5261 */
5262
5263void
5264xmlParsePI(xmlParserCtxtPtr ctxt) {
5265 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005266 size_t len = 0;
5267 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005268 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005269 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005270 xmlParserInputState state;
5271 int count = 0;
5272
5273 if ((RAW == '<') && (NXT(1) == '?')) {
5274 xmlParserInputPtr input = ctxt->input;
5275 state = ctxt->instate;
5276 ctxt->instate = XML_PARSER_PI;
5277 /*
5278 * this is a Processing Instruction.
5279 */
5280 SKIP(2);
5281 SHRINK;
5282
5283 /*
5284 * Parse the target name and check for special support like
5285 * namespace.
5286 */
5287 target = xmlParsePITarget(ctxt);
5288 if (target != NULL) {
5289 if ((RAW == '?') && (NXT(1) == '>')) {
5290 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005291 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5292 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005293 }
5294 SKIP(2);
5295
5296 /*
5297 * SAX: PI detected.
5298 */
5299 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5300 (ctxt->sax->processingInstruction != NULL))
5301 ctxt->sax->processingInstruction(ctxt->userData,
5302 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005303 if (ctxt->instate != XML_PARSER_EOF)
5304 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005305 return;
5306 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005307 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005308 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005309 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005310 ctxt->instate = state;
5311 return;
5312 }
5313 cur = CUR;
5314 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005315 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5316 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005317 }
5318 SKIP_BLANKS;
5319 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005320 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005321 ((cur != '?') || (NXT(1) != '>'))) {
5322 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005323 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005324 size_t new_size = size * 2;
5325 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005326 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005327 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005328 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005329 ctxt->instate = state;
5330 return;
5331 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005332 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005333 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005334 }
5335 count++;
5336 if (count > 50) {
5337 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005338 if (ctxt->instate == XML_PARSER_EOF) {
5339 xmlFree(buf);
5340 return;
5341 }
Owen Taylor3473f882001-02-23 17:55:21 +00005342 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005343 if ((len > XML_MAX_TEXT_LENGTH) &&
5344 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5345 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5346 "PI %s too big found", target);
5347 xmlFree(buf);
5348 ctxt->instate = state;
5349 return;
5350 }
Owen Taylor3473f882001-02-23 17:55:21 +00005351 }
5352 COPY_BUF(l,buf,len,cur);
5353 NEXTL(l);
5354 cur = CUR_CHAR(l);
5355 if (cur == 0) {
5356 SHRINK;
5357 GROW;
5358 cur = CUR_CHAR(l);
5359 }
5360 }
Daniel Veillard51304812012-07-19 20:34:26 +08005361 if ((len > XML_MAX_TEXT_LENGTH) &&
5362 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5363 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5364 "PI %s too big found", target);
5365 xmlFree(buf);
5366 ctxt->instate = state;
5367 return;
5368 }
Owen Taylor3473f882001-02-23 17:55:21 +00005369 buf[len] = 0;
5370 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005371 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5372 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005373 } else {
5374 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5376 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005377 }
5378 SKIP(2);
5379
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005380#ifdef LIBXML_CATALOG_ENABLED
5381 if (((state == XML_PARSER_MISC) ||
5382 (state == XML_PARSER_START)) &&
5383 (xmlStrEqual(target, XML_CATALOG_PI))) {
5384 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5385 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5386 (allow == XML_CATA_ALLOW_ALL))
5387 xmlParseCatalogPI(ctxt, buf);
5388 }
5389#endif
5390
5391
Owen Taylor3473f882001-02-23 17:55:21 +00005392 /*
5393 * SAX: PI detected.
5394 */
5395 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5396 (ctxt->sax->processingInstruction != NULL))
5397 ctxt->sax->processingInstruction(ctxt->userData,
5398 target, buf);
5399 }
5400 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005401 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005402 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005403 }
Chris Evans77404b82011-12-14 16:18:25 +08005404 if (ctxt->instate != XML_PARSER_EOF)
5405 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005406 }
5407}
5408
5409/**
5410 * xmlParseNotationDecl:
5411 * @ctxt: an XML parser context
5412 *
5413 * parse a notation declaration
5414 *
5415 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5416 *
5417 * Hence there is actually 3 choices:
5418 * 'PUBLIC' S PubidLiteral
5419 * 'PUBLIC' S PubidLiteral S SystemLiteral
5420 * and 'SYSTEM' S SystemLiteral
5421 *
5422 * See the NOTE on xmlParseExternalID().
5423 */
5424
5425void
5426xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005427 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005428 xmlChar *Pubid;
5429 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005430
Daniel Veillarda07050d2003-10-19 14:46:32 +00005431 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005432 xmlParserInputPtr input = ctxt->input;
5433 SHRINK;
5434 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005435 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5437 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005438 return;
5439 }
5440 SKIP_BLANKS;
5441
Daniel Veillard76d66f42001-05-16 21:05:17 +00005442 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005443 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005444 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005445 return;
5446 }
William M. Brack76e95df2003-10-18 16:20:14 +00005447 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005449 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005450 return;
5451 }
Daniel Veillard37334572008-07-31 08:20:02 +00005452 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005453 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005454 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005455 name, NULL, NULL);
5456 }
Owen Taylor3473f882001-02-23 17:55:21 +00005457 SKIP_BLANKS;
5458
5459 /*
5460 * Parse the IDs.
5461 */
5462 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5463 SKIP_BLANKS;
5464
5465 if (RAW == '>') {
5466 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005469 }
5470 NEXT;
5471 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5472 (ctxt->sax->notationDecl != NULL))
5473 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5474 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005475 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005476 }
Owen Taylor3473f882001-02-23 17:55:21 +00005477 if (Systemid != NULL) xmlFree(Systemid);
5478 if (Pubid != NULL) xmlFree(Pubid);
5479 }
5480}
5481
5482/**
5483 * xmlParseEntityDecl:
5484 * @ctxt: an XML parser context
5485 *
5486 * parse <!ENTITY declarations
5487 *
5488 * [70] EntityDecl ::= GEDecl | PEDecl
5489 *
5490 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5491 *
5492 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5493 *
5494 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5495 *
5496 * [74] PEDef ::= EntityValue | ExternalID
5497 *
5498 * [76] NDataDecl ::= S 'NDATA' S Name
5499 *
5500 * [ VC: Notation Declared ]
5501 * The Name must match the declared name of a notation.
5502 */
5503
5504void
5505xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005506 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005507 xmlChar *value = NULL;
5508 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005509 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005510 int isParameter = 0;
5511 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005512 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005513
Daniel Veillard4c778d82005-01-23 17:37:44 +00005514 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005515 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005516 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005517 SHRINK;
5518 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005519 skipped = SKIP_BLANKS;
5520 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005521 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5522 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005523 }
Owen Taylor3473f882001-02-23 17:55:21 +00005524
5525 if (RAW == '%') {
5526 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005527 skipped = SKIP_BLANKS;
5528 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
David Kilzer4472c3a2016-05-13 15:13:17 +08005530 "Space required after '%%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005531 }
Owen Taylor3473f882001-02-23 17:55:21 +00005532 isParameter = 1;
5533 }
5534
Daniel Veillard76d66f42001-05-16 21:05:17 +00005535 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005536 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005537 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5538 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005539 return;
5540 }
Daniel Veillard37334572008-07-31 08:20:02 +00005541 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005542 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005543 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005544 name, NULL, NULL);
5545 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005546 skipped = SKIP_BLANKS;
5547 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005548 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5549 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005550 }
Owen Taylor3473f882001-02-23 17:55:21 +00005551
Daniel Veillardf5582f12002-06-11 10:08:16 +00005552 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005553 /*
5554 * handle the various case of definitions...
5555 */
5556 if (isParameter) {
5557 if ((RAW == '"') || (RAW == '\'')) {
5558 value = xmlParseEntityValue(ctxt, &orig);
5559 if (value) {
5560 if ((ctxt->sax != NULL) &&
5561 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5562 ctxt->sax->entityDecl(ctxt->userData, name,
5563 XML_INTERNAL_PARAMETER_ENTITY,
5564 NULL, NULL, value);
5565 }
5566 } else {
5567 URI = xmlParseExternalID(ctxt, &literal, 1);
5568 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005569 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005570 }
5571 if (URI) {
5572 xmlURIPtr uri;
5573
5574 uri = xmlParseURI((const char *) URI);
5575 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005576 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5577 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005578 /*
5579 * This really ought to be a well formedness error
5580 * but the XML Core WG decided otherwise c.f. issue
5581 * E26 of the XML erratas.
5582 */
Owen Taylor3473f882001-02-23 17:55:21 +00005583 } else {
5584 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005585 /*
5586 * Okay this is foolish to block those but not
5587 * invalid URIs.
5588 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005589 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005590 } else {
5591 if ((ctxt->sax != NULL) &&
5592 (!ctxt->disableSAX) &&
5593 (ctxt->sax->entityDecl != NULL))
5594 ctxt->sax->entityDecl(ctxt->userData, name,
5595 XML_EXTERNAL_PARAMETER_ENTITY,
5596 literal, URI, NULL);
5597 }
5598 xmlFreeURI(uri);
5599 }
5600 }
5601 }
5602 } else {
5603 if ((RAW == '"') || (RAW == '\'')) {
5604 value = xmlParseEntityValue(ctxt, &orig);
5605 if ((ctxt->sax != NULL) &&
5606 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5607 ctxt->sax->entityDecl(ctxt->userData, name,
5608 XML_INTERNAL_GENERAL_ENTITY,
5609 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005610 /*
5611 * For expat compatibility in SAX mode.
5612 */
5613 if ((ctxt->myDoc == NULL) ||
5614 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5615 if (ctxt->myDoc == NULL) {
5616 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005617 if (ctxt->myDoc == NULL) {
5618 xmlErrMemory(ctxt, "New Doc failed");
5619 return;
5620 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005621 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005622 }
5623 if (ctxt->myDoc->intSubset == NULL)
5624 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5625 BAD_CAST "fake", NULL, NULL);
5626
Daniel Veillard1af9a412003-08-20 22:54:39 +00005627 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5628 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005629 }
Owen Taylor3473f882001-02-23 17:55:21 +00005630 } else {
5631 URI = xmlParseExternalID(ctxt, &literal, 1);
5632 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005633 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005634 }
5635 if (URI) {
5636 xmlURIPtr uri;
5637
5638 uri = xmlParseURI((const char *)URI);
5639 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005640 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5641 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005642 /*
5643 * This really ought to be a well formedness error
5644 * but the XML Core WG decided otherwise c.f. issue
5645 * E26 of the XML erratas.
5646 */
Owen Taylor3473f882001-02-23 17:55:21 +00005647 } else {
5648 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005649 /*
5650 * Okay this is foolish to block those but not
5651 * invalid URIs.
5652 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005653 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005654 }
5655 xmlFreeURI(uri);
5656 }
5657 }
William M. Brack76e95df2003-10-18 16:20:14 +00005658 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005659 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5660 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005661 }
5662 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005663 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005664 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005665 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005666 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5667 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005668 }
5669 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005670 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005671 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5672 (ctxt->sax->unparsedEntityDecl != NULL))
5673 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5674 literal, URI, ndata);
5675 } else {
5676 if ((ctxt->sax != NULL) &&
5677 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5678 ctxt->sax->entityDecl(ctxt->userData, name,
5679 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5680 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005681 /*
5682 * For expat compatibility in SAX mode.
5683 * assuming the entity repalcement was asked for
5684 */
5685 if ((ctxt->replaceEntities != 0) &&
5686 ((ctxt->myDoc == NULL) ||
5687 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5688 if (ctxt->myDoc == NULL) {
5689 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005690 if (ctxt->myDoc == NULL) {
5691 xmlErrMemory(ctxt, "New Doc failed");
5692 return;
5693 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005694 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005695 }
5696
5697 if (ctxt->myDoc->intSubset == NULL)
5698 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5699 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005700 xmlSAX2EntityDecl(ctxt, name,
5701 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5702 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005703 }
Owen Taylor3473f882001-02-23 17:55:21 +00005704 }
5705 }
5706 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005707 if (ctxt->instate == XML_PARSER_EOF)
5708 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005709 SKIP_BLANKS;
5710 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005711 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005712 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005713 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005714 } else {
5715 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005716 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5717 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005718 }
5719 NEXT;
5720 }
5721 if (orig != NULL) {
5722 /*
5723 * Ugly mechanism to save the raw entity value.
5724 */
5725 xmlEntityPtr cur = NULL;
5726
5727 if (isParameter) {
5728 if ((ctxt->sax != NULL) &&
5729 (ctxt->sax->getParameterEntity != NULL))
5730 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5731 } else {
5732 if ((ctxt->sax != NULL) &&
5733 (ctxt->sax->getEntity != NULL))
5734 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005735 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005736 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005737 }
Owen Taylor3473f882001-02-23 17:55:21 +00005738 }
5739 if (cur != NULL) {
5740 if (cur->orig != NULL)
5741 xmlFree(orig);
5742 else
5743 cur->orig = orig;
5744 } else
5745 xmlFree(orig);
5746 }
Owen Taylor3473f882001-02-23 17:55:21 +00005747 if (value != NULL) xmlFree(value);
5748 if (URI != NULL) xmlFree(URI);
5749 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005750 }
5751}
5752
5753/**
5754 * xmlParseDefaultDecl:
5755 * @ctxt: an XML parser context
5756 * @value: Receive a possible fixed default value for the attribute
5757 *
5758 * Parse an attribute default declaration
5759 *
5760 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5761 *
5762 * [ VC: Required Attribute ]
5763 * if the default declaration is the keyword #REQUIRED, then the
5764 * attribute must be specified for all elements of the type in the
5765 * attribute-list declaration.
5766 *
5767 * [ VC: Attribute Default Legal ]
5768 * The declared default value must meet the lexical constraints of
5769 * the declared attribute type c.f. xmlValidateAttributeDecl()
5770 *
5771 * [ VC: Fixed Attribute Default ]
5772 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005773 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005774 *
5775 * [ WFC: No < in Attribute Values ]
5776 * handled in xmlParseAttValue()
5777 *
5778 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005779 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005780 */
5781
5782int
5783xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5784 int val;
5785 xmlChar *ret;
5786
5787 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005788 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005789 SKIP(9);
5790 return(XML_ATTRIBUTE_REQUIRED);
5791 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005792 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005793 SKIP(8);
5794 return(XML_ATTRIBUTE_IMPLIED);
5795 }
5796 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005797 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005798 SKIP(6);
5799 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005800 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005801 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5802 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005803 }
5804 SKIP_BLANKS;
5805 }
5806 ret = xmlParseAttValue(ctxt);
5807 ctxt->instate = XML_PARSER_DTD;
5808 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005809 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005810 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005811 } else
5812 *value = ret;
5813 return(val);
5814}
5815
5816/**
5817 * xmlParseNotationType:
5818 * @ctxt: an XML parser context
5819 *
5820 * parse an Notation attribute type.
5821 *
5822 * Note: the leading 'NOTATION' S part has already being parsed...
5823 *
5824 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5825 *
5826 * [ VC: Notation Attributes ]
5827 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005828 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005829 *
5830 * Returns: the notation attribute tree built while parsing
5831 */
5832
5833xmlEnumerationPtr
5834xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005835 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005836 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005837
5838 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005839 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005840 return(NULL);
5841 }
5842 SHRINK;
5843 do {
5844 NEXT;
5845 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005846 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005847 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005848 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5849 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005850 xmlFreeEnumeration(ret);
5851 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005852 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005853 tmp = ret;
5854 while (tmp != NULL) {
5855 if (xmlStrEqual(name, tmp->name)) {
5856 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5857 "standalone: attribute notation value token %s duplicated\n",
5858 name, NULL);
5859 if (!xmlDictOwns(ctxt->dict, name))
5860 xmlFree((xmlChar *) name);
5861 break;
5862 }
5863 tmp = tmp->next;
5864 }
5865 if (tmp == NULL) {
5866 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005867 if (cur == NULL) {
5868 xmlFreeEnumeration(ret);
5869 return(NULL);
5870 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005871 if (last == NULL) ret = last = cur;
5872 else {
5873 last->next = cur;
5874 last = cur;
5875 }
Owen Taylor3473f882001-02-23 17:55:21 +00005876 }
5877 SKIP_BLANKS;
5878 } while (RAW == '|');
5879 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005880 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005881 xmlFreeEnumeration(ret);
5882 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005883 }
5884 NEXT;
5885 return(ret);
5886}
5887
5888/**
5889 * xmlParseEnumerationType:
5890 * @ctxt: an XML parser context
5891 *
5892 * parse an Enumeration attribute type.
5893 *
5894 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5895 *
5896 * [ VC: Enumeration ]
5897 * Values of this type must match one of the Nmtoken tokens in
5898 * the declaration
5899 *
5900 * Returns: the enumeration attribute tree built while parsing
5901 */
5902
5903xmlEnumerationPtr
5904xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5905 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005906 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005907
5908 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005909 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005910 return(NULL);
5911 }
5912 SHRINK;
5913 do {
5914 NEXT;
5915 SKIP_BLANKS;
5916 name = xmlParseNmtoken(ctxt);
5917 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005918 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005919 return(ret);
5920 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005921 tmp = ret;
5922 while (tmp != NULL) {
5923 if (xmlStrEqual(name, tmp->name)) {
5924 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5925 "standalone: attribute enumeration value token %s duplicated\n",
5926 name, NULL);
5927 if (!xmlDictOwns(ctxt->dict, name))
5928 xmlFree(name);
5929 break;
5930 }
5931 tmp = tmp->next;
5932 }
5933 if (tmp == NULL) {
5934 cur = xmlCreateEnumeration(name);
5935 if (!xmlDictOwns(ctxt->dict, name))
5936 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005937 if (cur == NULL) {
5938 xmlFreeEnumeration(ret);
5939 return(NULL);
5940 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005941 if (last == NULL) ret = last = cur;
5942 else {
5943 last->next = cur;
5944 last = cur;
5945 }
Owen Taylor3473f882001-02-23 17:55:21 +00005946 }
5947 SKIP_BLANKS;
5948 } while (RAW == '|');
5949 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005950 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005951 return(ret);
5952 }
5953 NEXT;
5954 return(ret);
5955}
5956
5957/**
5958 * xmlParseEnumeratedType:
5959 * @ctxt: an XML parser context
5960 * @tree: the enumeration tree built while parsing
5961 *
5962 * parse an Enumerated attribute type.
5963 *
5964 * [57] EnumeratedType ::= NotationType | Enumeration
5965 *
5966 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5967 *
5968 *
5969 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5970 */
5971
5972int
5973xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005974 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005975 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005976 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005977 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5978 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005979 return(0);
5980 }
5981 SKIP_BLANKS;
5982 *tree = xmlParseNotationType(ctxt);
5983 if (*tree == NULL) return(0);
5984 return(XML_ATTRIBUTE_NOTATION);
5985 }
5986 *tree = xmlParseEnumerationType(ctxt);
5987 if (*tree == NULL) return(0);
5988 return(XML_ATTRIBUTE_ENUMERATION);
5989}
5990
5991/**
5992 * xmlParseAttributeType:
5993 * @ctxt: an XML parser context
5994 * @tree: the enumeration tree built while parsing
5995 *
5996 * parse the Attribute list def for an element
5997 *
5998 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5999 *
6000 * [55] StringType ::= 'CDATA'
6001 *
6002 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6003 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6004 *
6005 * Validity constraints for attribute values syntax are checked in
6006 * xmlValidateAttributeValue()
6007 *
6008 * [ VC: ID ]
6009 * Values of type ID must match the Name production. A name must not
6010 * appear more than once in an XML document as a value of this type;
6011 * i.e., ID values must uniquely identify the elements which bear them.
6012 *
6013 * [ VC: One ID per Element Type ]
6014 * No element type may have more than one ID attribute specified.
6015 *
6016 * [ VC: ID Attribute Default ]
6017 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6018 *
6019 * [ VC: IDREF ]
6020 * Values of type IDREF must match the Name production, and values
6021 * of type IDREFS must match Names; each IDREF Name must match the value
6022 * of an ID attribute on some element in the XML document; i.e. IDREF
6023 * values must match the value of some ID attribute.
6024 *
6025 * [ VC: Entity Name ]
6026 * Values of type ENTITY must match the Name production, values
6027 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006028 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00006029 *
6030 * [ VC: Name Token ]
6031 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006032 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00006033 *
6034 * Returns the attribute type
6035 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006036int
Owen Taylor3473f882001-02-23 17:55:21 +00006037xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6038 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006039 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006040 SKIP(5);
6041 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006042 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006043 SKIP(6);
6044 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006045 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006046 SKIP(5);
6047 return(XML_ATTRIBUTE_IDREF);
6048 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6049 SKIP(2);
6050 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006051 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006052 SKIP(6);
6053 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006054 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006055 SKIP(8);
6056 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006057 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006058 SKIP(8);
6059 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006060 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006061 SKIP(7);
6062 return(XML_ATTRIBUTE_NMTOKEN);
6063 }
6064 return(xmlParseEnumeratedType(ctxt, tree));
6065}
6066
6067/**
6068 * xmlParseAttributeListDecl:
6069 * @ctxt: an XML parser context
6070 *
6071 * : parse the Attribute list def for an element
6072 *
6073 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6074 *
6075 * [53] AttDef ::= S Name S AttType S DefaultDecl
6076 *
6077 */
6078void
6079xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006080 const xmlChar *elemName;
6081 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00006082 xmlEnumerationPtr tree;
6083
Daniel Veillarda07050d2003-10-19 14:46:32 +00006084 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006085 xmlParserInputPtr input = ctxt->input;
6086
6087 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006088 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006089 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006090 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006091 }
6092 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006093 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006094 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006095 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6096 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006097 return;
6098 }
6099 SKIP_BLANKS;
6100 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006101 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006102 const xmlChar *check = CUR_PTR;
6103 int type;
6104 int def;
6105 xmlChar *defaultValue = NULL;
6106
6107 GROW;
6108 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006109 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006110 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006111 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6112 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006113 break;
6114 }
6115 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006116 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006117 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006118 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006119 break;
6120 }
6121 SKIP_BLANKS;
6122
6123 type = xmlParseAttributeType(ctxt, &tree);
6124 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006125 break;
6126 }
6127
6128 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006129 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006130 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6131 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006132 if (tree != NULL)
6133 xmlFreeEnumeration(tree);
6134 break;
6135 }
6136 SKIP_BLANKS;
6137
6138 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6139 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006140 if (defaultValue != NULL)
6141 xmlFree(defaultValue);
6142 if (tree != NULL)
6143 xmlFreeEnumeration(tree);
6144 break;
6145 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006146 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6147 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006148
6149 GROW;
6150 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006151 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006152 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006153 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006154 if (defaultValue != NULL)
6155 xmlFree(defaultValue);
6156 if (tree != NULL)
6157 xmlFreeEnumeration(tree);
6158 break;
6159 }
6160 SKIP_BLANKS;
6161 }
6162 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006163 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6164 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006165 if (defaultValue != NULL)
6166 xmlFree(defaultValue);
6167 if (tree != NULL)
6168 xmlFreeEnumeration(tree);
6169 break;
6170 }
6171 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6172 (ctxt->sax->attributeDecl != NULL))
6173 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6174 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006175 else if (tree != NULL)
6176 xmlFreeEnumeration(tree);
6177
6178 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006179 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006180 (def != XML_ATTRIBUTE_REQUIRED)) {
6181 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6182 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006183 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006184 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6185 }
Owen Taylor3473f882001-02-23 17:55:21 +00006186 if (defaultValue != NULL)
6187 xmlFree(defaultValue);
6188 GROW;
6189 }
6190 if (RAW == '>') {
6191 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006192 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6193 "Attribute list declaration doesn't start and stop in the same entity\n",
6194 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006195 }
6196 NEXT;
6197 }
Owen Taylor3473f882001-02-23 17:55:21 +00006198 }
6199}
6200
6201/**
6202 * xmlParseElementMixedContentDecl:
6203 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006204 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006205 *
6206 * parse the declaration for a Mixed Element content
6207 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006208 *
Owen Taylor3473f882001-02-23 17:55:21 +00006209 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6210 * '(' S? '#PCDATA' S? ')'
6211 *
6212 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6213 *
6214 * [ VC: No Duplicate Types ]
6215 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006216 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006217 *
6218 * returns: the list of the xmlElementContentPtr describing the element choices
6219 */
6220xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006221xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006222 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006223 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006224
6225 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006226 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006227 SKIP(7);
6228 SKIP_BLANKS;
6229 SHRINK;
6230 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006231 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006232 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6233"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006234 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006235 }
Owen Taylor3473f882001-02-23 17:55:21 +00006236 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006237 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006238 if (ret == NULL)
6239 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006240 if (RAW == '*') {
6241 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6242 NEXT;
6243 }
6244 return(ret);
6245 }
6246 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006247 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006248 if (ret == NULL) return(NULL);
6249 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006250 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006251 NEXT;
6252 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006253 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006254 if (ret == NULL) return(NULL);
6255 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006256 if (cur != NULL)
6257 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006258 cur = ret;
6259 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006260 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006261 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006262 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006263 if (n->c1 != NULL)
6264 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006265 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006266 if (n != NULL)
6267 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006268 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006269 }
6270 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006271 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006272 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006273 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006274 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006275 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006276 return(NULL);
6277 }
6278 SKIP_BLANKS;
6279 GROW;
6280 }
6281 if ((RAW == ')') && (NXT(1) == '*')) {
6282 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006283 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006284 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006285 if (cur->c2 != NULL)
6286 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006287 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006288 if (ret != NULL)
6289 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006290 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006291 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6292"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006293 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006294 }
Owen Taylor3473f882001-02-23 17:55:21 +00006295 SKIP(2);
6296 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006297 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006298 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006299 return(NULL);
6300 }
6301
6302 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006303 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006304 }
6305 return(ret);
6306}
6307
6308/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006309 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006310 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006311 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006312 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006313 *
6314 * parse the declaration for a Mixed Element content
6315 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006316 *
Owen Taylor3473f882001-02-23 17:55:21 +00006317 *
6318 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6319 *
6320 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6321 *
6322 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6323 *
6324 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6325 *
6326 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6327 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006328 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006329 * opening or closing parentheses in a choice, seq, or Mixed
6330 * construct is contained in the replacement text for a parameter
6331 * entity, both must be contained in the same replacement text. For
6332 * interoperability, if a parameter-entity reference appears in a
6333 * choice, seq, or Mixed construct, its replacement text should not
6334 * be empty, and neither the first nor last non-blank character of
6335 * the replacement text should be a connector (| or ,).
6336 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006337 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006338 * hierarchy.
6339 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006340static xmlElementContentPtr
6341xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6342 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006343 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006344 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006345 xmlChar type = 0;
6346
Daniel Veillard489f9672009-08-10 16:49:30 +02006347 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6348 (depth > 2048)) {
6349 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6350"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6351 depth);
6352 return(NULL);
6353 }
Owen Taylor3473f882001-02-23 17:55:21 +00006354 SKIP_BLANKS;
6355 GROW;
6356 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006357 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006358
Owen Taylor3473f882001-02-23 17:55:21 +00006359 /* Recurse on first child */
6360 NEXT;
6361 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006362 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6363 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006364 SKIP_BLANKS;
6365 GROW;
6366 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006367 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006368 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006369 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006370 return(NULL);
6371 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006372 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006373 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006374 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006375 return(NULL);
6376 }
Owen Taylor3473f882001-02-23 17:55:21 +00006377 GROW;
6378 if (RAW == '?') {
6379 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6380 NEXT;
6381 } else if (RAW == '*') {
6382 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6383 NEXT;
6384 } else if (RAW == '+') {
6385 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6386 NEXT;
6387 } else {
6388 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6389 }
Owen Taylor3473f882001-02-23 17:55:21 +00006390 GROW;
6391 }
6392 SKIP_BLANKS;
6393 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006394 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006395 /*
6396 * Each loop we parse one separator and one element.
6397 */
6398 if (RAW == ',') {
6399 if (type == 0) type = CUR;
6400
6401 /*
6402 * Detect "Name | Name , Name" error
6403 */
6404 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006405 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006406 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006407 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006408 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006409 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006410 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006411 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006412 return(NULL);
6413 }
6414 NEXT;
6415
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006416 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006417 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006418 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006419 xmlFreeDocElementContent(ctxt->myDoc, last);
6420 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006421 return(NULL);
6422 }
6423 if (last == NULL) {
6424 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006425 if (ret != NULL)
6426 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006427 ret = cur = op;
6428 } else {
6429 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006430 if (op != NULL)
6431 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006432 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006433 if (last != NULL)
6434 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006435 cur =op;
6436 last = NULL;
6437 }
6438 } else if (RAW == '|') {
6439 if (type == 0) type = CUR;
6440
6441 /*
6442 * Detect "Name , Name | Name" error
6443 */
6444 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006445 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006446 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006447 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006448 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006449 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006450 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006451 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006452 return(NULL);
6453 }
6454 NEXT;
6455
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006456 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006457 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006458 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006459 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006460 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006461 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006462 return(NULL);
6463 }
6464 if (last == NULL) {
6465 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006466 if (ret != NULL)
6467 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006468 ret = cur = op;
6469 } else {
6470 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006471 if (op != NULL)
6472 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006473 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006474 if (last != NULL)
6475 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006476 cur =op;
6477 last = NULL;
6478 }
6479 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006480 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006481 if ((last != NULL) && (last != ret))
6482 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006483 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006484 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006485 return(NULL);
6486 }
6487 GROW;
6488 SKIP_BLANKS;
6489 GROW;
6490 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006491 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006492 /* Recurse on second child */
6493 NEXT;
6494 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006495 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6496 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006497 SKIP_BLANKS;
6498 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006499 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006500 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006501 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006502 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006503 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006504 return(NULL);
6505 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006506 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006507 if (last == NULL) {
6508 if (ret != NULL)
6509 xmlFreeDocElementContent(ctxt->myDoc, ret);
6510 return(NULL);
6511 }
Owen Taylor3473f882001-02-23 17:55:21 +00006512 if (RAW == '?') {
6513 last->ocur = XML_ELEMENT_CONTENT_OPT;
6514 NEXT;
6515 } else if (RAW == '*') {
6516 last->ocur = XML_ELEMENT_CONTENT_MULT;
6517 NEXT;
6518 } else if (RAW == '+') {
6519 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6520 NEXT;
6521 } else {
6522 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6523 }
6524 }
6525 SKIP_BLANKS;
6526 GROW;
6527 }
6528 if ((cur != NULL) && (last != NULL)) {
6529 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006530 if (last != NULL)
6531 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006532 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006533 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006534 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6535"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006536 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006537 }
Owen Taylor3473f882001-02-23 17:55:21 +00006538 NEXT;
6539 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006540 if (ret != NULL) {
6541 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6542 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6543 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6544 else
6545 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6546 }
Owen Taylor3473f882001-02-23 17:55:21 +00006547 NEXT;
6548 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006549 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006550 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006551 cur = ret;
6552 /*
6553 * Some normalization:
6554 * (a | b* | c?)* == (a | b | c)*
6555 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006556 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006557 if ((cur->c1 != NULL) &&
6558 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6560 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6561 if ((cur->c2 != NULL) &&
6562 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6563 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6564 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6565 cur = cur->c2;
6566 }
6567 }
Owen Taylor3473f882001-02-23 17:55:21 +00006568 NEXT;
6569 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006570 if (ret != NULL) {
6571 int found = 0;
6572
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006573 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6574 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6575 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006576 else
6577 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006578 /*
6579 * Some normalization:
6580 * (a | b*)+ == (a | b)*
6581 * (a | b?)+ == (a | b)*
6582 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006583 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006584 if ((cur->c1 != NULL) &&
6585 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6586 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6587 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6588 found = 1;
6589 }
6590 if ((cur->c2 != NULL) &&
6591 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6592 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6593 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6594 found = 1;
6595 }
6596 cur = cur->c2;
6597 }
6598 if (found)
6599 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6600 }
Owen Taylor3473f882001-02-23 17:55:21 +00006601 NEXT;
6602 }
6603 return(ret);
6604}
6605
6606/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006607 * xmlParseElementChildrenContentDecl:
6608 * @ctxt: an XML parser context
6609 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006610 *
6611 * parse the declaration for a Mixed Element content
6612 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6613 *
6614 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6615 *
6616 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6617 *
6618 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6619 *
6620 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6621 *
6622 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6623 * TODO Parameter-entity replacement text must be properly nested
6624 * with parenthesized groups. That is to say, if either of the
6625 * opening or closing parentheses in a choice, seq, or Mixed
6626 * construct is contained in the replacement text for a parameter
6627 * entity, both must be contained in the same replacement text. For
6628 * interoperability, if a parameter-entity reference appears in a
6629 * choice, seq, or Mixed construct, its replacement text should not
6630 * be empty, and neither the first nor last non-blank character of
6631 * the replacement text should be a connector (| or ,).
6632 *
6633 * Returns the tree of xmlElementContentPtr describing the element
6634 * hierarchy.
6635 */
6636xmlElementContentPtr
6637xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6638 /* stub left for API/ABI compat */
6639 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6640}
6641
6642/**
Owen Taylor3473f882001-02-23 17:55:21 +00006643 * xmlParseElementContentDecl:
6644 * @ctxt: an XML parser context
6645 * @name: the name of the element being defined.
6646 * @result: the Element Content pointer will be stored here if any
6647 *
6648 * parse the declaration for an Element content either Mixed or Children,
6649 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006650 *
Owen Taylor3473f882001-02-23 17:55:21 +00006651 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6652 *
6653 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6654 */
6655
6656int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006657xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006658 xmlElementContentPtr *result) {
6659
6660 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006661 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006662 int res;
6663
6664 *result = NULL;
6665
6666 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006667 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006668 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006669 return(-1);
6670 }
6671 NEXT;
6672 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006673 if (ctxt->instate == XML_PARSER_EOF)
6674 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006675 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006676 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006677 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006678 res = XML_ELEMENT_TYPE_MIXED;
6679 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006680 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006681 res = XML_ELEMENT_TYPE_ELEMENT;
6682 }
Owen Taylor3473f882001-02-23 17:55:21 +00006683 SKIP_BLANKS;
6684 *result = tree;
6685 return(res);
6686}
6687
6688/**
6689 * xmlParseElementDecl:
6690 * @ctxt: an XML parser context
6691 *
6692 * parse an Element declaration.
6693 *
6694 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6695 *
6696 * [ VC: Unique Element Type Declaration ]
6697 * No element type may be declared more than once
6698 *
6699 * Returns the type of the element, or -1 in case of error
6700 */
6701int
6702xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006703 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006704 int ret = -1;
6705 xmlElementContentPtr content = NULL;
6706
Daniel Veillard4c778d82005-01-23 17:37:44 +00006707 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006708 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006709 xmlParserInputPtr input = ctxt->input;
6710
6711 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006712 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006713 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6714 "Space required after 'ELEMENT'\n");
David Kilzer00906752016-01-26 16:57:03 -08006715 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006716 }
6717 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006718 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006719 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006720 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6721 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006722 return(-1);
6723 }
6724 while ((RAW == 0) && (ctxt->inputNr > 1))
6725 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006726 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6728 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006729 }
6730 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006731 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006732 SKIP(5);
6733 /*
6734 * Element must always be empty.
6735 */
6736 ret = XML_ELEMENT_TYPE_EMPTY;
6737 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6738 (NXT(2) == 'Y')) {
6739 SKIP(3);
6740 /*
6741 * Element is a generic container.
6742 */
6743 ret = XML_ELEMENT_TYPE_ANY;
6744 } else if (RAW == '(') {
6745 ret = xmlParseElementContentDecl(ctxt, name, &content);
6746 } else {
6747 /*
6748 * [ WFC: PEs in Internal Subset ] error handling.
6749 */
6750 if ((RAW == '%') && (ctxt->external == 0) &&
6751 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006752 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006753 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006754 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006755 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006756 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6757 }
Owen Taylor3473f882001-02-23 17:55:21 +00006758 return(-1);
6759 }
6760
6761 SKIP_BLANKS;
6762 /*
6763 * Pop-up of finished entities.
6764 */
6765 while ((RAW == 0) && (ctxt->inputNr > 1))
6766 xmlPopInput(ctxt);
6767 SKIP_BLANKS;
6768
6769 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006770 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006771 if (content != NULL) {
6772 xmlFreeDocElementContent(ctxt->myDoc, content);
6773 }
Owen Taylor3473f882001-02-23 17:55:21 +00006774 } else {
6775 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006776 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6777 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006778 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006779
Owen Taylor3473f882001-02-23 17:55:21 +00006780 NEXT;
6781 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006782 (ctxt->sax->elementDecl != NULL)) {
6783 if (content != NULL)
6784 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006785 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6786 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006787 if ((content != NULL) && (content->parent == NULL)) {
6788 /*
6789 * this is a trick: if xmlAddElementDecl is called,
6790 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006791 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006792 * interfaces or change the API/ABI
6793 */
6794 xmlFreeDocElementContent(ctxt->myDoc, content);
6795 }
6796 } else if (content != NULL) {
6797 xmlFreeDocElementContent(ctxt->myDoc, content);
6798 }
Owen Taylor3473f882001-02-23 17:55:21 +00006799 }
Owen Taylor3473f882001-02-23 17:55:21 +00006800 }
6801 return(ret);
6802}
6803
6804/**
Owen Taylor3473f882001-02-23 17:55:21 +00006805 * xmlParseConditionalSections
6806 * @ctxt: an XML parser context
6807 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006808 * [61] conditionalSect ::= includeSect | ignoreSect
6809 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006810 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6811 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6812 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6813 */
6814
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006815static void
Owen Taylor3473f882001-02-23 17:55:21 +00006816xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006817 int id = ctxt->input->id;
6818
Owen Taylor3473f882001-02-23 17:55:21 +00006819 SKIP(3);
6820 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006821 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006822 SKIP(7);
6823 SKIP_BLANKS;
6824 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006825 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006826 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006827 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006828 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006829 if (ctxt->input->id != id) {
6830 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6831 "All markup of the conditional section is not in the same entity\n",
6832 NULL, NULL);
6833 }
Owen Taylor3473f882001-02-23 17:55:21 +00006834 NEXT;
6835 }
6836 if (xmlParserDebugEntities) {
6837 if ((ctxt->input != NULL) && (ctxt->input->filename))
6838 xmlGenericError(xmlGenericErrorContext,
6839 "%s(%d): ", ctxt->input->filename,
6840 ctxt->input->line);
6841 xmlGenericError(xmlGenericErrorContext,
6842 "Entering INCLUDE Conditional Section\n");
6843 }
6844
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006845 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6846 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006847 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006848 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006849
6850 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6851 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006852 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006853 NEXT;
6854 } else if (RAW == '%') {
6855 xmlParsePEReference(ctxt);
6856 } else
6857 xmlParseMarkupDecl(ctxt);
6858
6859 /*
6860 * Pop-up of finished entities.
6861 */
6862 while ((RAW == 0) && (ctxt->inputNr > 1))
6863 xmlPopInput(ctxt);
6864
Daniel Veillardfdc91562002-07-01 21:52:03 +00006865 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006866 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
David Kilzer00906752016-01-26 16:57:03 -08006867 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006868 break;
6869 }
6870 }
6871 if (xmlParserDebugEntities) {
6872 if ((ctxt->input != NULL) && (ctxt->input->filename))
6873 xmlGenericError(xmlGenericErrorContext,
6874 "%s(%d): ", ctxt->input->filename,
6875 ctxt->input->line);
6876 xmlGenericError(xmlGenericErrorContext,
6877 "Leaving INCLUDE Conditional Section\n");
6878 }
6879
Daniel Veillarda07050d2003-10-19 14:46:32 +00006880 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006881 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006882 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006883 int depth = 0;
6884
6885 SKIP(6);
6886 SKIP_BLANKS;
6887 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006888 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006889 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006890 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006891 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006892 if (ctxt->input->id != id) {
6893 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6894 "All markup of the conditional section is not in the same entity\n",
6895 NULL, NULL);
6896 }
Owen Taylor3473f882001-02-23 17:55:21 +00006897 NEXT;
6898 }
6899 if (xmlParserDebugEntities) {
6900 if ((ctxt->input != NULL) && (ctxt->input->filename))
6901 xmlGenericError(xmlGenericErrorContext,
6902 "%s(%d): ", ctxt->input->filename,
6903 ctxt->input->line);
6904 xmlGenericError(xmlGenericErrorContext,
6905 "Entering IGNORE Conditional Section\n");
6906 }
6907
6908 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006909 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006910 * But disable SAX event generating DTD building in the meantime
6911 */
6912 state = ctxt->disableSAX;
6913 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006914 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006915 ctxt->instate = XML_PARSER_IGNORE;
6916
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006917 while (((depth >= 0) && (RAW != 0)) &&
6918 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006919 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6920 depth++;
6921 SKIP(3);
6922 continue;
6923 }
6924 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6925 if (--depth >= 0) SKIP(3);
6926 continue;
6927 }
6928 NEXT;
6929 continue;
6930 }
6931
6932 ctxt->disableSAX = state;
6933 ctxt->instate = instate;
6934
6935 if (xmlParserDebugEntities) {
6936 if ((ctxt->input != NULL) && (ctxt->input->filename))
6937 xmlGenericError(xmlGenericErrorContext,
6938 "%s(%d): ", ctxt->input->filename,
6939 ctxt->input->line);
6940 xmlGenericError(xmlGenericErrorContext,
6941 "Leaving IGNORE Conditional Section\n");
6942 }
6943
6944 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006945 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006946 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006947 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006948 }
6949
6950 if (RAW == 0)
6951 SHRINK;
6952
6953 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006954 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006955 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006956 if (ctxt->input->id != id) {
6957 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6958 "All markup of the conditional section is not in the same entity\n",
6959 NULL, NULL);
6960 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006961 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006962 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006963 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006964 }
6965}
6966
6967/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006968 * xmlParseMarkupDecl:
6969 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006970 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006971 * parse Markup declarations
6972 *
6973 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6974 * NotationDecl | PI | Comment
6975 *
6976 * [ VC: Proper Declaration/PE Nesting ]
6977 * Parameter-entity replacement text must be properly nested with
6978 * markup declarations. That is to say, if either the first character
6979 * or the last character of a markup declaration (markupdecl above) is
6980 * contained in the replacement text for a parameter-entity reference,
6981 * both must be contained in the same replacement text.
6982 *
6983 * [ WFC: PEs in Internal Subset ]
6984 * In the internal DTD subset, parameter-entity references can occur
6985 * only where markup declarations can occur, not within markup declarations.
6986 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006987 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006988 */
6989void
6990xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6991 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006992 if (CUR == '<') {
6993 if (NXT(1) == '!') {
6994 switch (NXT(2)) {
6995 case 'E':
6996 if (NXT(3) == 'L')
6997 xmlParseElementDecl(ctxt);
6998 else if (NXT(3) == 'N')
6999 xmlParseEntityDecl(ctxt);
7000 break;
7001 case 'A':
7002 xmlParseAttributeListDecl(ctxt);
7003 break;
7004 case 'N':
7005 xmlParseNotationDecl(ctxt);
7006 break;
7007 case '-':
7008 xmlParseComment(ctxt);
7009 break;
7010 default:
7011 /* there is an error but it will be detected later */
7012 break;
7013 }
7014 } else if (NXT(1) == '?') {
7015 xmlParsePI(ctxt);
7016 }
7017 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08007018
7019 /*
7020 * detect requirement to exit there and act accordingly
7021 * and avoid having instate overriden later on
7022 */
7023 if (ctxt->instate == XML_PARSER_EOF)
7024 return;
7025
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007026 /*
7027 * This is only for internal subset. On external entities,
7028 * the replacement is done before parsing stage
7029 */
7030 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7031 xmlParsePEReference(ctxt);
7032
7033 /*
7034 * Conditional sections are allowed from entities included
7035 * by PE References in the internal subset.
7036 */
7037 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7038 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7039 xmlParseConditionalSections(ctxt);
7040 }
7041 }
7042
7043 ctxt->instate = XML_PARSER_DTD;
7044}
7045
7046/**
7047 * xmlParseTextDecl:
7048 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00007049 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007050 * parse an XML declaration header for external entities
7051 *
7052 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007053 */
7054
7055void
7056xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7057 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007058 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007059
7060 /*
7061 * We know that '<?xml' is here.
7062 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007063 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007064 SKIP(5);
7065 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007066 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007067 return;
7068 }
7069
William M. Brack76e95df2003-10-18 16:20:14 +00007070 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007071 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7072 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007073 }
7074 SKIP_BLANKS;
7075
7076 /*
7077 * We may have the VersionInfo here.
7078 */
7079 version = xmlParseVersionInfo(ctxt);
7080 if (version == NULL)
7081 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00007082 else {
William M. Brack76e95df2003-10-18 16:20:14 +00007083 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007084 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7085 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00007086 }
7087 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007088 ctxt->input->version = version;
7089
7090 /*
7091 * We must have the encoding declaration
7092 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007093 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007094 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7095 /*
7096 * The XML REC instructs us to stop parsing right here
7097 */
7098 return;
7099 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007100 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7101 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7102 "Missing encoding in text declaration\n");
7103 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007104
7105 SKIP_BLANKS;
7106 if ((RAW == '?') && (NXT(1) == '>')) {
7107 SKIP(2);
7108 } else if (RAW == '>') {
7109 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007110 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007111 NEXT;
7112 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007113 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007114 MOVETO_ENDTAG(CUR_PTR);
7115 NEXT;
7116 }
7117}
7118
7119/**
Owen Taylor3473f882001-02-23 17:55:21 +00007120 * xmlParseExternalSubset:
7121 * @ctxt: an XML parser context
7122 * @ExternalID: the external identifier
7123 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007124 *
Owen Taylor3473f882001-02-23 17:55:21 +00007125 * parse Markup declarations from an external subset
7126 *
7127 * [30] extSubset ::= textDecl? extSubsetDecl
7128 *
7129 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7130 */
7131void
7132xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7133 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007134 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007135 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007136
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007137 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007138 (ctxt->input->end - ctxt->input->cur >= 4)) {
7139 xmlChar start[4];
7140 xmlCharEncoding enc;
7141
7142 start[0] = RAW;
7143 start[1] = NXT(1);
7144 start[2] = NXT(2);
7145 start[3] = NXT(3);
7146 enc = xmlDetectCharEncoding(start, 4);
7147 if (enc != XML_CHAR_ENCODING_NONE)
7148 xmlSwitchEncoding(ctxt, enc);
7149 }
7150
Daniel Veillarda07050d2003-10-19 14:46:32 +00007151 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007152 xmlParseTextDecl(ctxt);
7153 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7154 /*
7155 * The XML REC instructs us to stop parsing right here
7156 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08007157 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007158 return;
7159 }
7160 }
7161 if (ctxt->myDoc == NULL) {
7162 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007163 if (ctxt->myDoc == NULL) {
7164 xmlErrMemory(ctxt, "New Doc failed");
7165 return;
7166 }
7167 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007168 }
7169 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7170 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7171
7172 ctxt->instate = XML_PARSER_DTD;
7173 ctxt->external = 1;
7174 while (((RAW == '<') && (NXT(1) == '?')) ||
7175 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007176 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007177 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007178 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007179
7180 GROW;
7181 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7182 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007183 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007184 NEXT;
7185 } else if (RAW == '%') {
7186 xmlParsePEReference(ctxt);
7187 } else
7188 xmlParseMarkupDecl(ctxt);
7189
7190 /*
7191 * Pop-up of finished entities.
7192 */
7193 while ((RAW == 0) && (ctxt->inputNr > 1))
7194 xmlPopInput(ctxt);
7195
Daniel Veillardfdc91562002-07-01 21:52:03 +00007196 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007197 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007198 break;
7199 }
7200 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007201
Owen Taylor3473f882001-02-23 17:55:21 +00007202 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007203 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007204 }
7205
7206}
7207
7208/**
7209 * xmlParseReference:
7210 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007211 *
Owen Taylor3473f882001-02-23 17:55:21 +00007212 * parse and handle entity references in content, depending on the SAX
7213 * interface, this may end-up in a call to character() if this is a
7214 * CharRef, a predefined entity, if there is no reference() callback.
7215 * or if the parser was asked to switch to that mode.
7216 *
7217 * [67] Reference ::= EntityRef | CharRef
7218 */
7219void
7220xmlParseReference(xmlParserCtxtPtr ctxt) {
7221 xmlEntityPtr ent;
7222 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007223 int was_checked;
7224 xmlNodePtr list = NULL;
7225 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007226
Daniel Veillard0161e632008-08-28 15:36:32 +00007227
7228 if (RAW != '&')
7229 return;
7230
7231 /*
7232 * Simple case of a CharRef
7233 */
Owen Taylor3473f882001-02-23 17:55:21 +00007234 if (NXT(1) == '#') {
7235 int i = 0;
7236 xmlChar out[10];
7237 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007238 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007239
Daniel Veillarddc171602008-03-26 17:41:38 +00007240 if (value == 0)
7241 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007242 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7243 /*
7244 * So we are using non-UTF-8 buffers
7245 * Check that the char fit on 8bits, if not
7246 * generate a CharRef.
7247 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007248 if (value <= 0xFF) {
7249 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007250 out[1] = 0;
7251 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7252 (!ctxt->disableSAX))
7253 ctxt->sax->characters(ctxt->userData, out, 1);
7254 } else {
7255 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007256 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007257 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007258 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007259 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7260 (!ctxt->disableSAX))
7261 ctxt->sax->reference(ctxt->userData, out);
7262 }
7263 } else {
7264 /*
7265 * Just encode the value in UTF-8
7266 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007267 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007268 out[i] = 0;
7269 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7270 (!ctxt->disableSAX))
7271 ctxt->sax->characters(ctxt->userData, out, i);
7272 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007273 return;
7274 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007275
Daniel Veillard0161e632008-08-28 15:36:32 +00007276 /*
7277 * We are seeing an entity reference
7278 */
7279 ent = xmlParseEntityRef(ctxt);
7280 if (ent == NULL) return;
7281 if (!ctxt->wellFormed)
7282 return;
7283 was_checked = ent->checked;
7284
7285 /* special case of predefined entities */
7286 if ((ent->name == NULL) ||
7287 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7288 val = ent->content;
7289 if (val == NULL) return;
7290 /*
7291 * inline the entity.
7292 */
7293 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7294 (!ctxt->disableSAX))
7295 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7296 return;
7297 }
7298
7299 /*
7300 * The first reference to the entity trigger a parsing phase
7301 * where the ent->children is filled with the result from
7302 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007303 * Note: external parsed entities will not be loaded, it is not
7304 * required for a non-validating parser, unless the parsing option
7305 * of validating, or substituting entities were given. Doing so is
7306 * far more secure as the parser will only process data coming from
7307 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007308 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007309 if (((ent->checked == 0) ||
7310 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007311 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7312 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007313 unsigned long oldnbent = ctxt->nbentities;
7314
7315 /*
7316 * This is a bit hackish but this seems the best
7317 * way to make sure both SAX and DOM entity support
7318 * behaves okay.
7319 */
7320 void *user_data;
7321 if (ctxt->userData == ctxt)
7322 user_data = NULL;
7323 else
7324 user_data = ctxt->userData;
7325
7326 /*
7327 * Check that this entity is well formed
7328 * 4.3.2: An internal general parsed entity is well-formed
7329 * if its replacement text matches the production labeled
7330 * content.
7331 */
7332 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7333 ctxt->depth++;
7334 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7335 user_data, &list);
7336 ctxt->depth--;
7337
7338 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7339 ctxt->depth++;
7340 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7341 user_data, ctxt->depth, ent->URI,
7342 ent->ExternalID, &list);
7343 ctxt->depth--;
7344 } else {
7345 ret = XML_ERR_ENTITY_PE_INTERNAL;
7346 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7347 "invalid entity type found\n", NULL);
7348 }
7349
7350 /*
7351 * Store the number of entities needing parsing for this entity
7352 * content and do checkings
7353 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007354 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7355 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7356 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007357 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007358 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007359 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007360 return;
7361 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007362 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007363 xmlFreeNodeList(list);
7364 return;
7365 }
Owen Taylor3473f882001-02-23 17:55:21 +00007366
Daniel Veillard0161e632008-08-28 15:36:32 +00007367 if ((ret == XML_ERR_OK) && (list != NULL)) {
7368 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7369 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7370 (ent->children == NULL)) {
7371 ent->children = list;
7372 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007373 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007374 * Prune it directly in the generated document
7375 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007376 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007377 if (((list->type == XML_TEXT_NODE) &&
7378 (list->next == NULL)) ||
7379 (ctxt->parseMode == XML_PARSE_READER)) {
7380 list->parent = (xmlNodePtr) ent;
7381 list = NULL;
7382 ent->owner = 1;
7383 } else {
7384 ent->owner = 0;
7385 while (list != NULL) {
7386 list->parent = (xmlNodePtr) ctxt->node;
7387 list->doc = ctxt->myDoc;
7388 if (list->next == NULL)
7389 ent->last = list;
7390 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007391 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007392 list = ent->children;
7393#ifdef LIBXML_LEGACY_ENABLED
7394 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7395 xmlAddEntityReference(ent, list, NULL);
7396#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007397 }
7398 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007399 ent->owner = 1;
7400 while (list != NULL) {
7401 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007402 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007403 if (list->next == NULL)
7404 ent->last = list;
7405 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007406 }
7407 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007408 } else {
7409 xmlFreeNodeList(list);
7410 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007411 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007412 } else if ((ret != XML_ERR_OK) &&
7413 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7414 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7415 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007416 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007417 } else if (list != NULL) {
7418 xmlFreeNodeList(list);
7419 list = NULL;
7420 }
7421 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007422 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007423 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007424 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007425 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007426
Daniel Veillard0161e632008-08-28 15:36:32 +00007427 /*
7428 * Now that the entity content has been gathered
7429 * provide it to the application, this can take different forms based
7430 * on the parsing modes.
7431 */
7432 if (ent->children == NULL) {
7433 /*
7434 * Probably running in SAX mode and the callbacks don't
7435 * build the entity content. So unless we already went
7436 * though parsing for first checking go though the entity
7437 * content to generate callbacks associated to the entity
7438 */
7439 if (was_checked != 0) {
7440 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007441 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007442 * This is a bit hackish but this seems the best
7443 * way to make sure both SAX and DOM entity support
7444 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007445 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007446 if (ctxt->userData == ctxt)
7447 user_data = NULL;
7448 else
7449 user_data = ctxt->userData;
7450
7451 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7452 ctxt->depth++;
7453 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7454 ent->content, user_data, NULL);
7455 ctxt->depth--;
7456 } else if (ent->etype ==
7457 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7458 ctxt->depth++;
7459 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7460 ctxt->sax, user_data, ctxt->depth,
7461 ent->URI, ent->ExternalID, NULL);
7462 ctxt->depth--;
7463 } else {
7464 ret = XML_ERR_ENTITY_PE_INTERNAL;
7465 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7466 "invalid entity type found\n", NULL);
7467 }
7468 if (ret == XML_ERR_ENTITY_LOOP) {
7469 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7470 return;
7471 }
7472 }
7473 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7474 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7475 /*
7476 * Entity reference callback comes second, it's somewhat
7477 * superfluous but a compatibility to historical behaviour
7478 */
7479 ctxt->sax->reference(ctxt->userData, ent->name);
7480 }
7481 return;
7482 }
7483
7484 /*
7485 * If we didn't get any children for the entity being built
7486 */
7487 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7488 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7489 /*
7490 * Create a node.
7491 */
7492 ctxt->sax->reference(ctxt->userData, ent->name);
7493 return;
7494 }
7495
7496 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7497 /*
7498 * There is a problem on the handling of _private for entities
7499 * (bug 155816): Should we copy the content of the field from
7500 * the entity (possibly overwriting some value set by the user
7501 * when a copy is created), should we leave it alone, or should
7502 * we try to take care of different situations? The problem
7503 * is exacerbated by the usage of this field by the xmlReader.
7504 * To fix this bug, we look at _private on the created node
7505 * and, if it's NULL, we copy in whatever was in the entity.
7506 * If it's not NULL we leave it alone. This is somewhat of a
7507 * hack - maybe we should have further tests to determine
7508 * what to do.
7509 */
7510 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7511 /*
7512 * Seems we are generating the DOM content, do
7513 * a simple tree copy for all references except the first
7514 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007515 */
7516 if (((list == NULL) && (ent->owner == 0)) ||
7517 (ctxt->parseMode == XML_PARSE_READER)) {
7518 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7519
7520 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007521 * We are copying here, make sure there is no abuse
7522 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007523 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007524 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7525 return;
7526
7527 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007528 * when operating on a reader, the entities definitions
7529 * are always owning the entities subtree.
7530 if (ctxt->parseMode == XML_PARSE_READER)
7531 ent->owner = 1;
7532 */
7533
7534 cur = ent->children;
7535 while (cur != NULL) {
7536 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7537 if (nw != NULL) {
7538 if (nw->_private == NULL)
7539 nw->_private = cur->_private;
7540 if (firstChild == NULL){
7541 firstChild = nw;
7542 }
7543 nw = xmlAddChild(ctxt->node, nw);
7544 }
7545 if (cur == ent->last) {
7546 /*
7547 * needed to detect some strange empty
7548 * node cases in the reader tests
7549 */
7550 if ((ctxt->parseMode == XML_PARSE_READER) &&
7551 (nw != NULL) &&
7552 (nw->type == XML_ELEMENT_NODE) &&
7553 (nw->children == NULL))
7554 nw->extra = 1;
7555
7556 break;
7557 }
7558 cur = cur->next;
7559 }
7560#ifdef LIBXML_LEGACY_ENABLED
7561 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7562 xmlAddEntityReference(ent, firstChild, nw);
7563#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007564 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007565 xmlNodePtr nw = NULL, cur, next, last,
7566 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007567
7568 /*
7569 * We are copying here, make sure there is no abuse
7570 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007571 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007572 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7573 return;
7574
Daniel Veillard0161e632008-08-28 15:36:32 +00007575 /*
7576 * Copy the entity child list and make it the new
7577 * entity child list. The goal is to make sure any
7578 * ID or REF referenced will be the one from the
7579 * document content and not the entity copy.
7580 */
7581 cur = ent->children;
7582 ent->children = NULL;
7583 last = ent->last;
7584 ent->last = NULL;
7585 while (cur != NULL) {
7586 next = cur->next;
7587 cur->next = NULL;
7588 cur->parent = NULL;
7589 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7590 if (nw != NULL) {
7591 if (nw->_private == NULL)
7592 nw->_private = cur->_private;
7593 if (firstChild == NULL){
7594 firstChild = cur;
7595 }
7596 xmlAddChild((xmlNodePtr) ent, nw);
7597 xmlAddChild(ctxt->node, cur);
7598 }
7599 if (cur == last)
7600 break;
7601 cur = next;
7602 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007603 if (ent->owner == 0)
7604 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007605#ifdef LIBXML_LEGACY_ENABLED
7606 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7607 xmlAddEntityReference(ent, firstChild, nw);
7608#endif /* LIBXML_LEGACY_ENABLED */
7609 } else {
7610 const xmlChar *nbktext;
7611
7612 /*
7613 * the name change is to avoid coalescing of the
7614 * node with a possible previous text one which
7615 * would make ent->children a dangling pointer
7616 */
7617 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7618 -1);
7619 if (ent->children->type == XML_TEXT_NODE)
7620 ent->children->name = nbktext;
7621 if ((ent->last != ent->children) &&
7622 (ent->last->type == XML_TEXT_NODE))
7623 ent->last->name = nbktext;
7624 xmlAddChildList(ctxt->node, ent->children);
7625 }
7626
7627 /*
7628 * This is to avoid a nasty side effect, see
7629 * characters() in SAX.c
7630 */
7631 ctxt->nodemem = 0;
7632 ctxt->nodelen = 0;
7633 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007634 }
7635 }
7636}
7637
7638/**
7639 * xmlParseEntityRef:
7640 * @ctxt: an XML parser context
7641 *
7642 * parse ENTITY references declarations
7643 *
7644 * [68] EntityRef ::= '&' Name ';'
7645 *
7646 * [ WFC: Entity Declared ]
7647 * In a document without any DTD, a document with only an internal DTD
7648 * subset which contains no parameter entity references, or a document
7649 * with "standalone='yes'", the Name given in the entity reference
7650 * must match that in an entity declaration, except that well-formed
7651 * documents need not declare any of the following entities: amp, lt,
7652 * gt, apos, quot. The declaration of a parameter entity must precede
7653 * any reference to it. Similarly, the declaration of a general entity
7654 * must precede any reference to it which appears in a default value in an
7655 * attribute-list declaration. Note that if entities are declared in the
7656 * external subset or in external parameter entities, a non-validating
7657 * processor is not obligated to read and process their declarations;
7658 * for such documents, the rule that an entity must be declared is a
7659 * well-formedness constraint only if standalone='yes'.
7660 *
7661 * [ WFC: Parsed Entity ]
7662 * An entity reference must not contain the name of an unparsed entity
7663 *
7664 * Returns the xmlEntityPtr if found, or NULL otherwise.
7665 */
7666xmlEntityPtr
7667xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007668 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007669 xmlEntityPtr ent = NULL;
7670
7671 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007672 if (ctxt->instate == XML_PARSER_EOF)
7673 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007674
Daniel Veillard0161e632008-08-28 15:36:32 +00007675 if (RAW != '&')
7676 return(NULL);
7677 NEXT;
7678 name = xmlParseName(ctxt);
7679 if (name == NULL) {
7680 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7681 "xmlParseEntityRef: no name\n");
7682 return(NULL);
7683 }
7684 if (RAW != ';') {
7685 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7686 return(NULL);
7687 }
7688 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007689
Daniel Veillard0161e632008-08-28 15:36:32 +00007690 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007691 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007692 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007693 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7694 ent = xmlGetPredefinedEntity(name);
7695 if (ent != NULL)
7696 return(ent);
7697 }
Owen Taylor3473f882001-02-23 17:55:21 +00007698
Daniel Veillard0161e632008-08-28 15:36:32 +00007699 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007700 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007701 */
7702 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007703
Daniel Veillard0161e632008-08-28 15:36:32 +00007704 /*
7705 * Ask first SAX for entity resolution, otherwise try the
7706 * entities which may have stored in the parser context.
7707 */
7708 if (ctxt->sax != NULL) {
7709 if (ctxt->sax->getEntity != NULL)
7710 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007711 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007712 (ctxt->options & XML_PARSE_OLDSAX))
7713 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007714 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7715 (ctxt->userData==ctxt)) {
7716 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007717 }
7718 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007719 if (ctxt->instate == XML_PARSER_EOF)
7720 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007721 /*
7722 * [ WFC: Entity Declared ]
7723 * In a document without any DTD, a document with only an
7724 * internal DTD subset which contains no parameter entity
7725 * references, or a document with "standalone='yes'", the
7726 * Name given in the entity reference must match that in an
7727 * entity declaration, except that well-formed documents
7728 * need not declare any of the following entities: amp, lt,
7729 * gt, apos, quot.
7730 * The declaration of a parameter entity must precede any
7731 * reference to it.
7732 * Similarly, the declaration of a general entity must
7733 * precede any reference to it which appears in a default
7734 * value in an attribute-list declaration. Note that if
7735 * entities are declared in the external subset or in
7736 * external parameter entities, a non-validating processor
7737 * is not obligated to read and process their declarations;
7738 * for such documents, the rule that an entity must be
7739 * declared is a well-formedness constraint only if
7740 * standalone='yes'.
7741 */
7742 if (ent == NULL) {
7743 if ((ctxt->standalone == 1) ||
7744 ((ctxt->hasExternalSubset == 0) &&
7745 (ctxt->hasPErefs == 0))) {
7746 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7747 "Entity '%s' not defined\n", name);
7748 } else {
7749 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7750 "Entity '%s' not defined\n", name);
7751 if ((ctxt->inSubset == 0) &&
7752 (ctxt->sax != NULL) &&
7753 (ctxt->sax->reference != NULL)) {
7754 ctxt->sax->reference(ctxt->userData, name);
7755 }
7756 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007757 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007758 ctxt->valid = 0;
7759 }
7760
7761 /*
7762 * [ WFC: Parsed Entity ]
7763 * An entity reference must not contain the name of an
7764 * unparsed entity
7765 */
7766 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7767 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7768 "Entity reference to unparsed entity %s\n", name);
7769 }
7770
7771 /*
7772 * [ WFC: No External Entity References ]
7773 * Attribute values cannot contain direct or indirect
7774 * entity references to external entities.
7775 */
7776 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7777 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7778 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7779 "Attribute references external entity '%s'\n", name);
7780 }
7781 /*
7782 * [ WFC: No < in Attribute Values ]
7783 * The replacement text of any entity referred to directly or
7784 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007785 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007786 */
7787 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007788 (ent != NULL) &&
7789 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007790 if (((ent->checked & 1) || (ent->checked == 0)) &&
7791 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007792 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7793 "'<' in entity '%s' is not allowed in attributes values\n", name);
7794 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007795 }
7796
7797 /*
7798 * Internal check, no parameter entities here ...
7799 */
7800 else {
7801 switch (ent->etype) {
7802 case XML_INTERNAL_PARAMETER_ENTITY:
7803 case XML_EXTERNAL_PARAMETER_ENTITY:
7804 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7805 "Attempt to reference the parameter entity '%s'\n",
7806 name);
7807 break;
7808 default:
7809 break;
7810 }
7811 }
7812
7813 /*
7814 * [ WFC: No Recursion ]
7815 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007816 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007817 * Done somewhere else
7818 */
Owen Taylor3473f882001-02-23 17:55:21 +00007819 return(ent);
7820}
7821
7822/**
7823 * xmlParseStringEntityRef:
7824 * @ctxt: an XML parser context
7825 * @str: a pointer to an index in the string
7826 *
7827 * parse ENTITY references declarations, but this version parses it from
7828 * a string value.
7829 *
7830 * [68] EntityRef ::= '&' Name ';'
7831 *
7832 * [ WFC: Entity Declared ]
7833 * In a document without any DTD, a document with only an internal DTD
7834 * subset which contains no parameter entity references, or a document
7835 * with "standalone='yes'", the Name given in the entity reference
7836 * must match that in an entity declaration, except that well-formed
7837 * documents need not declare any of the following entities: amp, lt,
7838 * gt, apos, quot. The declaration of a parameter entity must precede
7839 * any reference to it. Similarly, the declaration of a general entity
7840 * must precede any reference to it which appears in a default value in an
7841 * attribute-list declaration. Note that if entities are declared in the
7842 * external subset or in external parameter entities, a non-validating
7843 * processor is not obligated to read and process their declarations;
7844 * for such documents, the rule that an entity must be declared is a
7845 * well-formedness constraint only if standalone='yes'.
7846 *
7847 * [ WFC: Parsed Entity ]
7848 * An entity reference must not contain the name of an unparsed entity
7849 *
7850 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7851 * is updated to the current location in the string.
7852 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007853static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007854xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7855 xmlChar *name;
7856 const xmlChar *ptr;
7857 xmlChar cur;
7858 xmlEntityPtr ent = NULL;
7859
7860 if ((str == NULL) || (*str == NULL))
7861 return(NULL);
7862 ptr = *str;
7863 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007864 if (cur != '&')
7865 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007866
Daniel Veillard0161e632008-08-28 15:36:32 +00007867 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007868 name = xmlParseStringName(ctxt, &ptr);
7869 if (name == NULL) {
7870 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7871 "xmlParseStringEntityRef: no name\n");
7872 *str = ptr;
7873 return(NULL);
7874 }
7875 if (*ptr != ';') {
7876 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007877 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007878 *str = ptr;
7879 return(NULL);
7880 }
7881 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007882
Owen Taylor3473f882001-02-23 17:55:21 +00007883
Daniel Veillard0161e632008-08-28 15:36:32 +00007884 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007885 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007886 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007887 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7888 ent = xmlGetPredefinedEntity(name);
7889 if (ent != NULL) {
7890 xmlFree(name);
7891 *str = ptr;
7892 return(ent);
7893 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007894 }
Owen Taylor3473f882001-02-23 17:55:21 +00007895
Daniel Veillard0161e632008-08-28 15:36:32 +00007896 /*
7897 * Increate the number of entity references parsed
7898 */
7899 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007900
Daniel Veillard0161e632008-08-28 15:36:32 +00007901 /*
7902 * Ask first SAX for entity resolution, otherwise try the
7903 * entities which may have stored in the parser context.
7904 */
7905 if (ctxt->sax != NULL) {
7906 if (ctxt->sax->getEntity != NULL)
7907 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007908 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7909 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007910 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7911 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007912 }
7913 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007914 if (ctxt->instate == XML_PARSER_EOF) {
7915 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007916 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007917 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007918
7919 /*
7920 * [ WFC: Entity Declared ]
7921 * In a document without any DTD, a document with only an
7922 * internal DTD subset which contains no parameter entity
7923 * references, or a document with "standalone='yes'", the
7924 * Name given in the entity reference must match that in an
7925 * entity declaration, except that well-formed documents
7926 * need not declare any of the following entities: amp, lt,
7927 * gt, apos, quot.
7928 * The declaration of a parameter entity must precede any
7929 * reference to it.
7930 * Similarly, the declaration of a general entity must
7931 * precede any reference to it which appears in a default
7932 * value in an attribute-list declaration. Note that if
7933 * entities are declared in the external subset or in
7934 * external parameter entities, a non-validating processor
7935 * is not obligated to read and process their declarations;
7936 * for such documents, the rule that an entity must be
7937 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007938 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007939 */
7940 if (ent == NULL) {
7941 if ((ctxt->standalone == 1) ||
7942 ((ctxt->hasExternalSubset == 0) &&
7943 (ctxt->hasPErefs == 0))) {
7944 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7945 "Entity '%s' not defined\n", name);
7946 } else {
7947 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7948 "Entity '%s' not defined\n",
7949 name);
7950 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007951 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007952 /* TODO ? check regressions ctxt->valid = 0; */
7953 }
7954
7955 /*
7956 * [ WFC: Parsed Entity ]
7957 * An entity reference must not contain the name of an
7958 * unparsed entity
7959 */
7960 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7961 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7962 "Entity reference to unparsed entity %s\n", name);
7963 }
7964
7965 /*
7966 * [ WFC: No External Entity References ]
7967 * Attribute values cannot contain direct or indirect
7968 * entity references to external entities.
7969 */
7970 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7971 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7972 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7973 "Attribute references external entity '%s'\n", name);
7974 }
7975 /*
7976 * [ WFC: No < in Attribute Values ]
7977 * The replacement text of any entity referred to directly or
7978 * indirectly in an attribute value (other than "&lt;") must
7979 * not contain a <.
7980 */
7981 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7982 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007983 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007984 (xmlStrchr(ent->content, '<'))) {
7985 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7986 "'<' in entity '%s' is not allowed in attributes values\n",
7987 name);
7988 }
7989
7990 /*
7991 * Internal check, no parameter entities here ...
7992 */
7993 else {
7994 switch (ent->etype) {
7995 case XML_INTERNAL_PARAMETER_ENTITY:
7996 case XML_EXTERNAL_PARAMETER_ENTITY:
7997 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7998 "Attempt to reference the parameter entity '%s'\n",
7999 name);
8000 break;
8001 default:
8002 break;
8003 }
8004 }
8005
8006 /*
8007 * [ WFC: No Recursion ]
8008 * A parsed entity must not contain a recursive reference
8009 * to itself, either directly or indirectly.
8010 * Done somewhere else
8011 */
8012
8013 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008014 *str = ptr;
8015 return(ent);
8016}
8017
8018/**
8019 * xmlParsePEReference:
8020 * @ctxt: an XML parser context
8021 *
8022 * parse PEReference declarations
8023 * The entity content is handled directly by pushing it's content as
8024 * a new input stream.
8025 *
8026 * [69] PEReference ::= '%' Name ';'
8027 *
8028 * [ WFC: No Recursion ]
8029 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008030 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008031 *
8032 * [ WFC: Entity Declared ]
8033 * In a document without any DTD, a document with only an internal DTD
8034 * subset which contains no parameter entity references, or a document
8035 * with "standalone='yes'", ... ... The declaration of a parameter
8036 * entity must precede any reference to it...
8037 *
8038 * [ VC: Entity Declared ]
8039 * In a document with an external subset or external parameter entities
8040 * with "standalone='no'", ... ... The declaration of a parameter entity
8041 * must precede any reference to it...
8042 *
8043 * [ WFC: In DTD ]
8044 * Parameter-entity references may only appear in the DTD.
8045 * NOTE: misleading but this is handled.
8046 */
8047void
Daniel Veillard8f597c32003-10-06 08:19:27 +00008048xmlParsePEReference(xmlParserCtxtPtr ctxt)
8049{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008050 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008051 xmlEntityPtr entity = NULL;
8052 xmlParserInputPtr input;
8053
Daniel Veillard0161e632008-08-28 15:36:32 +00008054 if (RAW != '%')
8055 return;
8056 NEXT;
8057 name = xmlParseName(ctxt);
8058 if (name == NULL) {
8059 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8060 "xmlParsePEReference: no name\n");
8061 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008062 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008063 if (RAW != ';') {
8064 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8065 return;
8066 }
8067
8068 NEXT;
8069
8070 /*
8071 * Increate the number of entity references parsed
8072 */
8073 ctxt->nbentities++;
8074
8075 /*
8076 * Request the entity from SAX
8077 */
8078 if ((ctxt->sax != NULL) &&
8079 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008080 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8081 if (ctxt->instate == XML_PARSER_EOF)
8082 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00008083 if (entity == NULL) {
8084 /*
8085 * [ WFC: Entity Declared ]
8086 * In a document without any DTD, a document with only an
8087 * internal DTD subset which contains no parameter entity
8088 * references, or a document with "standalone='yes'", ...
8089 * ... The declaration of a parameter entity must precede
8090 * any reference to it...
8091 */
8092 if ((ctxt->standalone == 1) ||
8093 ((ctxt->hasExternalSubset == 0) &&
8094 (ctxt->hasPErefs == 0))) {
8095 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8096 "PEReference: %%%s; not found\n",
8097 name);
8098 } else {
8099 /*
8100 * [ VC: Entity Declared ]
8101 * In a document with an external subset or external
8102 * parameter entities with "standalone='no'", ...
8103 * ... The declaration of a parameter entity must
8104 * precede any reference to it...
8105 */
8106 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8107 "PEReference: %%%s; not found\n",
8108 name, NULL);
8109 ctxt->valid = 0;
8110 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008111 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008112 } else {
8113 /*
8114 * Internal checking in case the entity quest barfed
8115 */
8116 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8117 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8118 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8119 "Internal: %%%s; is not a parameter entity\n",
8120 name, NULL);
8121 } else if (ctxt->input->free != deallocblankswrapper) {
8122 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8123 if (xmlPushInput(ctxt, input) < 0)
8124 return;
8125 } else {
8126 /*
8127 * TODO !!!
8128 * handle the extra spaces added before and after
8129 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8130 */
8131 input = xmlNewEntityInputStream(ctxt, entity);
8132 if (xmlPushInput(ctxt, input) < 0)
8133 return;
8134 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8135 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8136 (IS_BLANK_CH(NXT(5)))) {
8137 xmlParseTextDecl(ctxt);
8138 if (ctxt->errNo ==
8139 XML_ERR_UNSUPPORTED_ENCODING) {
8140 /*
8141 * The XML REC instructs us to stop parsing
8142 * right here
8143 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08008144 xmlHaltParser(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00008145 return;
8146 }
8147 }
8148 }
8149 }
8150 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008151}
8152
8153/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008154 * xmlLoadEntityContent:
8155 * @ctxt: an XML parser context
8156 * @entity: an unloaded system entity
8157 *
8158 * Load the original content of the given system entity from the
8159 * ExternalID/SystemID given. This is to be used for Included in Literal
8160 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8161 *
8162 * Returns 0 in case of success and -1 in case of failure
8163 */
8164static int
8165xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8166 xmlParserInputPtr input;
8167 xmlBufferPtr buf;
8168 int l, c;
8169 int count = 0;
8170
8171 if ((ctxt == NULL) || (entity == NULL) ||
8172 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8173 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8174 (entity->content != NULL)) {
8175 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8176 "xmlLoadEntityContent parameter error");
8177 return(-1);
8178 }
8179
8180 if (xmlParserDebugEntities)
8181 xmlGenericError(xmlGenericErrorContext,
8182 "Reading %s entity content input\n", entity->name);
8183
8184 buf = xmlBufferCreate();
8185 if (buf == NULL) {
8186 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8187 "xmlLoadEntityContent parameter error");
8188 return(-1);
8189 }
8190
8191 input = xmlNewEntityInputStream(ctxt, entity);
8192 if (input == NULL) {
8193 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8194 "xmlLoadEntityContent input error");
8195 xmlBufferFree(buf);
8196 return(-1);
8197 }
8198
8199 /*
8200 * Push the entity as the current input, read char by char
8201 * saving to the buffer until the end of the entity or an error
8202 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008203 if (xmlPushInput(ctxt, input) < 0) {
8204 xmlBufferFree(buf);
8205 return(-1);
8206 }
8207
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008208 GROW;
8209 c = CUR_CHAR(l);
8210 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8211 (IS_CHAR(c))) {
8212 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008213 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008214 count = 0;
8215 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008216 if (ctxt->instate == XML_PARSER_EOF) {
8217 xmlBufferFree(buf);
8218 return(-1);
8219 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008220 }
8221 NEXTL(l);
8222 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008223 if (c == 0) {
8224 count = 0;
8225 GROW;
8226 if (ctxt->instate == XML_PARSER_EOF) {
8227 xmlBufferFree(buf);
8228 return(-1);
8229 }
8230 c = CUR_CHAR(l);
8231 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008232 }
8233
8234 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8235 xmlPopInput(ctxt);
8236 } else if (!IS_CHAR(c)) {
8237 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8238 "xmlLoadEntityContent: invalid char value %d\n",
8239 c);
8240 xmlBufferFree(buf);
8241 return(-1);
8242 }
8243 entity->content = buf->content;
8244 buf->content = NULL;
8245 xmlBufferFree(buf);
8246
8247 return(0);
8248}
8249
8250/**
Owen Taylor3473f882001-02-23 17:55:21 +00008251 * xmlParseStringPEReference:
8252 * @ctxt: an XML parser context
8253 * @str: a pointer to an index in the string
8254 *
8255 * parse PEReference declarations
8256 *
8257 * [69] PEReference ::= '%' Name ';'
8258 *
8259 * [ WFC: No Recursion ]
8260 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008261 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008262 *
8263 * [ WFC: Entity Declared ]
8264 * In a document without any DTD, a document with only an internal DTD
8265 * subset which contains no parameter entity references, or a document
8266 * with "standalone='yes'", ... ... The declaration of a parameter
8267 * entity must precede any reference to it...
8268 *
8269 * [ VC: Entity Declared ]
8270 * In a document with an external subset or external parameter entities
8271 * with "standalone='no'", ... ... The declaration of a parameter entity
8272 * must precede any reference to it...
8273 *
8274 * [ WFC: In DTD ]
8275 * Parameter-entity references may only appear in the DTD.
8276 * NOTE: misleading but this is handled.
8277 *
8278 * Returns the string of the entity content.
8279 * str is updated to the current value of the index
8280 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008281static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008282xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8283 const xmlChar *ptr;
8284 xmlChar cur;
8285 xmlChar *name;
8286 xmlEntityPtr entity = NULL;
8287
8288 if ((str == NULL) || (*str == NULL)) return(NULL);
8289 ptr = *str;
8290 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008291 if (cur != '%')
8292 return(NULL);
8293 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008294 name = xmlParseStringName(ctxt, &ptr);
8295 if (name == NULL) {
8296 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8297 "xmlParseStringPEReference: no name\n");
8298 *str = ptr;
8299 return(NULL);
8300 }
8301 cur = *ptr;
8302 if (cur != ';') {
8303 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8304 xmlFree(name);
8305 *str = ptr;
8306 return(NULL);
8307 }
8308 ptr++;
8309
8310 /*
8311 * Increate the number of entity references parsed
8312 */
8313 ctxt->nbentities++;
8314
8315 /*
8316 * Request the entity from SAX
8317 */
8318 if ((ctxt->sax != NULL) &&
8319 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008320 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8321 if (ctxt->instate == XML_PARSER_EOF) {
8322 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008323 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008324 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008325 if (entity == NULL) {
8326 /*
8327 * [ WFC: Entity Declared ]
8328 * In a document without any DTD, a document with only an
8329 * internal DTD subset which contains no parameter entity
8330 * references, or a document with "standalone='yes'", ...
8331 * ... The declaration of a parameter entity must precede
8332 * any reference to it...
8333 */
8334 if ((ctxt->standalone == 1) ||
8335 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8336 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8337 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008338 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008339 /*
8340 * [ VC: Entity Declared ]
8341 * In a document with an external subset or external
8342 * parameter entities with "standalone='no'", ...
8343 * ... The declaration of a parameter entity must
8344 * precede any reference to it...
8345 */
8346 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8347 "PEReference: %%%s; not found\n",
8348 name, NULL);
8349 ctxt->valid = 0;
8350 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008351 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008352 } else {
8353 /*
8354 * Internal checking in case the entity quest barfed
8355 */
8356 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8357 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8358 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8359 "%%%s; is not a parameter entity\n",
8360 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008361 }
8362 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008363 ctxt->hasPErefs = 1;
8364 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008365 *str = ptr;
8366 return(entity);
8367}
8368
8369/**
8370 * xmlParseDocTypeDecl:
8371 * @ctxt: an XML parser context
8372 *
8373 * parse a DOCTYPE declaration
8374 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008375 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008376 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8377 *
8378 * [ VC: Root Element Type ]
8379 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008380 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008381 */
8382
8383void
8384xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008385 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008386 xmlChar *ExternalID = NULL;
8387 xmlChar *URI = NULL;
8388
8389 /*
8390 * We know that '<!DOCTYPE' has been detected.
8391 */
8392 SKIP(9);
8393
8394 SKIP_BLANKS;
8395
8396 /*
8397 * Parse the DOCTYPE name.
8398 */
8399 name = xmlParseName(ctxt);
8400 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008401 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8402 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008403 }
8404 ctxt->intSubName = name;
8405
8406 SKIP_BLANKS;
8407
8408 /*
8409 * Check for SystemID and ExternalID
8410 */
8411 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8412
8413 if ((URI != NULL) || (ExternalID != NULL)) {
8414 ctxt->hasExternalSubset = 1;
8415 }
8416 ctxt->extSubURI = URI;
8417 ctxt->extSubSystem = ExternalID;
8418
8419 SKIP_BLANKS;
8420
8421 /*
8422 * Create and update the internal subset.
8423 */
8424 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8425 (!ctxt->disableSAX))
8426 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008427 if (ctxt->instate == XML_PARSER_EOF)
8428 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008429
8430 /*
8431 * Is there any internal subset declarations ?
8432 * they are handled separately in xmlParseInternalSubset()
8433 */
8434 if (RAW == '[')
8435 return;
8436
8437 /*
8438 * We should be at the end of the DOCTYPE declaration.
8439 */
8440 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008441 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008442 }
8443 NEXT;
8444}
8445
8446/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008447 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008448 * @ctxt: an XML parser context
8449 *
8450 * parse the internal subset declaration
8451 *
8452 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8453 */
8454
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008455static void
Owen Taylor3473f882001-02-23 17:55:21 +00008456xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8457 /*
8458 * Is there any DTD definition ?
8459 */
8460 if (RAW == '[') {
8461 ctxt->instate = XML_PARSER_DTD;
8462 NEXT;
8463 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008464 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008465 * PEReferences.
8466 * Subsequence (markupdecl | PEReference | S)*
8467 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008468 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008469 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008470 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008471
8472 SKIP_BLANKS;
8473 xmlParseMarkupDecl(ctxt);
8474 xmlParsePEReference(ctxt);
8475
8476 /*
8477 * Pop-up of finished entities.
8478 */
8479 while ((RAW == 0) && (ctxt->inputNr > 1))
8480 xmlPopInput(ctxt);
8481
8482 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008483 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008484 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008485 break;
8486 }
8487 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008488 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008489 NEXT;
8490 SKIP_BLANKS;
8491 }
8492 }
8493
8494 /*
8495 * We should be at the end of the DOCTYPE declaration.
8496 */
8497 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008498 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008499 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008500 }
8501 NEXT;
8502}
8503
Daniel Veillard81273902003-09-30 00:43:48 +00008504#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008505/**
8506 * xmlParseAttribute:
8507 * @ctxt: an XML parser context
8508 * @value: a xmlChar ** used to store the value of the attribute
8509 *
8510 * parse an attribute
8511 *
8512 * [41] Attribute ::= Name Eq AttValue
8513 *
8514 * [ WFC: No External Entity References ]
8515 * Attribute values cannot contain direct or indirect entity references
8516 * to external entities.
8517 *
8518 * [ WFC: No < in Attribute Values ]
8519 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008520 * an attribute value (other than "&lt;") must not contain a <.
8521 *
Owen Taylor3473f882001-02-23 17:55:21 +00008522 * [ VC: Attribute Value Type ]
8523 * The attribute must have been declared; the value must be of the type
8524 * declared for it.
8525 *
8526 * [25] Eq ::= S? '=' S?
8527 *
8528 * With namespace:
8529 *
8530 * [NS 11] Attribute ::= QName Eq AttValue
8531 *
8532 * Also the case QName == xmlns:??? is handled independently as a namespace
8533 * definition.
8534 *
8535 * Returns the attribute name, and the value in *value.
8536 */
8537
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008538const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008539xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008540 const xmlChar *name;
8541 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008542
8543 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008544 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008545 name = xmlParseName(ctxt);
8546 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008547 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008548 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008549 return(NULL);
8550 }
8551
8552 /*
8553 * read the value
8554 */
8555 SKIP_BLANKS;
8556 if (RAW == '=') {
8557 NEXT;
8558 SKIP_BLANKS;
8559 val = xmlParseAttValue(ctxt);
8560 ctxt->instate = XML_PARSER_CONTENT;
8561 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008562 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008563 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008564 return(NULL);
8565 }
8566
8567 /*
8568 * Check that xml:lang conforms to the specification
8569 * No more registered as an error, just generate a warning now
8570 * since this was deprecated in XML second edition
8571 */
8572 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8573 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008574 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8575 "Malformed value for xml:lang : %s\n",
8576 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008577 }
8578 }
8579
8580 /*
8581 * Check that xml:space conforms to the specification
8582 */
8583 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8584 if (xmlStrEqual(val, BAD_CAST "default"))
8585 *(ctxt->space) = 0;
8586 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8587 *(ctxt->space) = 1;
8588 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008589 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008590"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008591 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008592 }
8593 }
8594
8595 *value = val;
8596 return(name);
8597}
8598
8599/**
8600 * xmlParseStartTag:
8601 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008602 *
Owen Taylor3473f882001-02-23 17:55:21 +00008603 * parse a start of tag either for rule element or
8604 * EmptyElement. In both case we don't parse the tag closing chars.
8605 *
8606 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8607 *
8608 * [ WFC: Unique Att Spec ]
8609 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008610 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008611 *
8612 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8613 *
8614 * [ WFC: Unique Att Spec ]
8615 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008616 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008617 *
8618 * With namespace:
8619 *
8620 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8621 *
8622 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8623 *
8624 * Returns the element name parsed
8625 */
8626
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008627const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008628xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008629 const xmlChar *name;
8630 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008631 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008632 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008633 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008634 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008635 int i;
8636
8637 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008638 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008639
8640 name = xmlParseName(ctxt);
8641 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008642 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008643 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008644 return(NULL);
8645 }
8646
8647 /*
8648 * Now parse the attributes, it ends up with the ending
8649 *
8650 * (S Attribute)* S?
8651 */
8652 SKIP_BLANKS;
8653 GROW;
8654
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008655 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008656 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008657 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008658 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008659 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008660
8661 attname = xmlParseAttribute(ctxt, &attvalue);
8662 if ((attname != NULL) && (attvalue != NULL)) {
8663 /*
8664 * [ WFC: Unique Att Spec ]
8665 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008666 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008667 */
8668 for (i = 0; i < nbatts;i += 2) {
8669 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008670 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008671 xmlFree(attvalue);
8672 goto failed;
8673 }
8674 }
Owen Taylor3473f882001-02-23 17:55:21 +00008675 /*
8676 * Add the pair to atts
8677 */
8678 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008679 maxatts = 22; /* allow for 10 attrs by default */
8680 atts = (const xmlChar **)
8681 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008682 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008683 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008684 if (attvalue != NULL)
8685 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008686 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008687 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008688 ctxt->atts = atts;
8689 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008690 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008691 const xmlChar **n;
8692
Owen Taylor3473f882001-02-23 17:55:21 +00008693 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008694 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008695 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008696 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008697 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008698 if (attvalue != NULL)
8699 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008700 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008701 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008702 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008703 ctxt->atts = atts;
8704 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008705 }
8706 atts[nbatts++] = attname;
8707 atts[nbatts++] = attvalue;
8708 atts[nbatts] = NULL;
8709 atts[nbatts + 1] = NULL;
8710 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008711 if (attvalue != NULL)
8712 xmlFree(attvalue);
8713 }
8714
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008715failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008716
Daniel Veillard3772de32002-12-17 10:31:45 +00008717 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008718 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8719 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008720 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008721 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8722 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008723 }
8724 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008725 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8726 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008727 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8728 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008729 break;
8730 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008731 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008732 GROW;
8733 }
8734
8735 /*
8736 * SAX: Start of Element !
8737 */
8738 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008739 (!ctxt->disableSAX)) {
8740 if (nbatts > 0)
8741 ctxt->sax->startElement(ctxt->userData, name, atts);
8742 else
8743 ctxt->sax->startElement(ctxt->userData, name, NULL);
8744 }
Owen Taylor3473f882001-02-23 17:55:21 +00008745
8746 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008747 /* Free only the content strings */
8748 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008749 if (atts[i] != NULL)
8750 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008751 }
8752 return(name);
8753}
8754
8755/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008757 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008758 * @line: line of the start tag
8759 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008760 *
8761 * parse an end of tag
8762 *
8763 * [42] ETag ::= '</' Name S? '>'
8764 *
8765 * With namespace
8766 *
8767 * [NS 9] ETag ::= '</' QName S? '>'
8768 */
8769
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008770static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008771xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008772 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008773
8774 GROW;
8775 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008776 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008777 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008778 return;
8779 }
8780 SKIP(2);
8781
Daniel Veillard46de64e2002-05-29 08:21:33 +00008782 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008783
8784 /*
8785 * We should definitely be at the ending "S? '>'" part
8786 */
8787 GROW;
8788 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008789 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008790 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008791 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008792 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008793
8794 /*
8795 * [ WFC: Element Type Match ]
8796 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008797 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008798 *
8799 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008800 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008801 if (name == NULL) name = BAD_CAST "unparseable";
8802 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008803 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008804 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008805 }
8806
8807 /*
8808 * SAX: End of Tag
8809 */
8810 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8811 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008812 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008813
Daniel Veillarde57ec792003-09-10 10:50:59 +00008814 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008815 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008816 return;
8817}
8818
8819/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008820 * xmlParseEndTag:
8821 * @ctxt: an XML parser context
8822 *
8823 * parse an end of tag
8824 *
8825 * [42] ETag ::= '</' Name S? '>'
8826 *
8827 * With namespace
8828 *
8829 * [NS 9] ETag ::= '</' QName S? '>'
8830 */
8831
8832void
8833xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008834 xmlParseEndTag1(ctxt, 0);
8835}
Daniel Veillard81273902003-09-30 00:43:48 +00008836#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008837
8838/************************************************************************
8839 * *
8840 * SAX 2 specific operations *
8841 * *
8842 ************************************************************************/
8843
Daniel Veillard0fb18932003-09-07 09:14:37 +00008844/*
8845 * xmlGetNamespace:
8846 * @ctxt: an XML parser context
8847 * @prefix: the prefix to lookup
8848 *
8849 * Lookup the namespace name for the @prefix (which ca be NULL)
Jan Pokornýbb654fe2016-04-13 16:56:07 +02008850 * The prefix must come from the @ctxt->dict dictionary
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851 *
8852 * Returns the namespace name or NULL if not bound
8853 */
8854static const xmlChar *
8855xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8856 int i;
8857
Daniel Veillarde57ec792003-09-10 10:50:59 +00008858 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008860 if (ctxt->nsTab[i] == prefix) {
8861 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8862 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008863 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008864 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008865 return(NULL);
8866}
8867
8868/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008869 * xmlParseQName:
8870 * @ctxt: an XML parser context
8871 * @prefix: pointer to store the prefix part
8872 *
8873 * parse an XML Namespace QName
8874 *
8875 * [6] QName ::= (Prefix ':')? LocalPart
8876 * [7] Prefix ::= NCName
8877 * [8] LocalPart ::= NCName
8878 *
8879 * Returns the Name parsed or NULL
8880 */
8881
8882static const xmlChar *
8883xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8884 const xmlChar *l, *p;
8885
8886 GROW;
8887
8888 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008889 if (l == NULL) {
8890 if (CUR == ':') {
8891 l = xmlParseName(ctxt);
8892 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008893 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008894 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008895 *prefix = NULL;
8896 return(l);
8897 }
8898 }
8899 return(NULL);
8900 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008901 if (CUR == ':') {
8902 NEXT;
8903 p = l;
8904 l = xmlParseNCName(ctxt);
8905 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008906 xmlChar *tmp;
8907
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008908 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8909 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008910 l = xmlParseNmtoken(ctxt);
8911 if (l == NULL)
8912 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8913 else {
8914 tmp = xmlBuildQName(l, p, NULL, 0);
8915 xmlFree((char *)l);
8916 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008917 p = xmlDictLookup(ctxt->dict, tmp, -1);
8918 if (tmp != NULL) xmlFree(tmp);
8919 *prefix = NULL;
8920 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008921 }
8922 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008923 xmlChar *tmp;
8924
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008925 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8926 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008927 NEXT;
8928 tmp = (xmlChar *) xmlParseName(ctxt);
8929 if (tmp != NULL) {
8930 tmp = xmlBuildQName(tmp, l, NULL, 0);
8931 l = xmlDictLookup(ctxt->dict, tmp, -1);
8932 if (tmp != NULL) xmlFree(tmp);
8933 *prefix = p;
8934 return(l);
8935 }
8936 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8937 l = xmlDictLookup(ctxt->dict, tmp, -1);
8938 if (tmp != NULL) xmlFree(tmp);
8939 *prefix = p;
8940 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008941 }
8942 *prefix = p;
8943 } else
8944 *prefix = NULL;
8945 return(l);
8946}
8947
8948/**
8949 * xmlParseQNameAndCompare:
8950 * @ctxt: an XML parser context
8951 * @name: the localname
8952 * @prefix: the prefix, if any.
8953 *
8954 * parse an XML name and compares for match
8955 * (specialized for endtag parsing)
8956 *
8957 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8958 * and the name for mismatch
8959 */
8960
8961static const xmlChar *
8962xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8963 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008964 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008965 const xmlChar *in;
8966 const xmlChar *ret;
8967 const xmlChar *prefix2;
8968
8969 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8970
8971 GROW;
8972 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008973
Daniel Veillard0fb18932003-09-07 09:14:37 +00008974 cmp = prefix;
8975 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008976 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008977 ++cmp;
8978 }
8979 if ((*cmp == 0) && (*in == ':')) {
8980 in++;
8981 cmp = name;
8982 while (*in != 0 && *in == *cmp) {
8983 ++in;
8984 ++cmp;
8985 }
William M. Brack76e95df2003-10-18 16:20:14 +00008986 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008987 /* success */
8988 ctxt->input->cur = in;
8989 return((const xmlChar*) 1);
8990 }
8991 }
8992 /*
8993 * all strings coms from the dictionary, equality can be done directly
8994 */
8995 ret = xmlParseQName (ctxt, &prefix2);
8996 if ((ret == name) && (prefix == prefix2))
8997 return((const xmlChar*) 1);
8998 return ret;
8999}
9000
9001/**
9002 * xmlParseAttValueInternal:
9003 * @ctxt: an XML parser context
9004 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009005 * @alloc: whether the attribute was reallocated as a new string
9006 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00009007 *
9008 * parse a value for an attribute.
9009 * NOTE: if no normalization is needed, the routine will return pointers
9010 * directly from the data buffer.
9011 *
9012 * 3.3.3 Attribute-Value Normalization:
9013 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009014 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009015 * - a character reference is processed by appending the referenced
9016 * character to the attribute value
9017 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009018 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00009019 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9020 * appending #x20 to the normalized value, except that only a single
9021 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009022 * parsed entity or the literal entity value of an internal parsed entity
9023 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00009024 * If the declared value is not CDATA, then the XML processor must further
9025 * process the normalized attribute value by discarding any leading and
9026 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009027 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009028 * All attributes for which no declaration has been read should be treated
9029 * by a non-validating parser as if declared CDATA.
9030 *
9031 * Returns the AttValue parsed or NULL. The value has to be freed by the
9032 * caller if it was copied, this can be detected by val[*len] == 0.
9033 */
9034
9035static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009036xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9037 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009038{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009039 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009040 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009041 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08009042 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009043
9044 GROW;
9045 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08009046 line = ctxt->input->line;
9047 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009048 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009049 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009050 return (NULL);
9051 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009052 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009053
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009054 /*
9055 * try to handle in this routine the most common case where no
9056 * allocation of a new string is required and where content is
9057 * pure ASCII.
9058 */
9059 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009060 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009061 end = ctxt->input->end;
9062 start = in;
9063 if (in >= end) {
9064 const xmlChar *oldbase = ctxt->input->base;
9065 GROW;
9066 if (oldbase != ctxt->input->base) {
9067 long delta = ctxt->input->base - oldbase;
9068 start = start + delta;
9069 in = in + delta;
9070 }
9071 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009072 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009073 if (normalize) {
9074 /*
9075 * Skip any leading spaces
9076 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009077 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009078 ((*in == 0x20) || (*in == 0x9) ||
9079 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009080 if (*in == 0xA) {
9081 line++; col = 1;
9082 } else {
9083 col++;
9084 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009085 in++;
9086 start = in;
9087 if (in >= end) {
9088 const xmlChar *oldbase = ctxt->input->base;
9089 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009090 if (ctxt->instate == XML_PARSER_EOF)
9091 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009092 if (oldbase != ctxt->input->base) {
9093 long delta = ctxt->input->base - oldbase;
9094 start = start + delta;
9095 in = in + delta;
9096 }
9097 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009098 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9099 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9100 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009101 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009102 return(NULL);
9103 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009104 }
9105 }
9106 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9107 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009108 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009109 if ((*in++ == 0x20) && (*in == 0x20)) break;
9110 if (in >= end) {
9111 const xmlChar *oldbase = ctxt->input->base;
9112 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009113 if (ctxt->instate == XML_PARSER_EOF)
9114 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009115 if (oldbase != ctxt->input->base) {
9116 long delta = ctxt->input->base - oldbase;
9117 start = start + delta;
9118 in = in + delta;
9119 }
9120 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009121 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9122 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9123 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009124 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009125 return(NULL);
9126 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009127 }
9128 }
9129 last = in;
9130 /*
9131 * skip the trailing blanks
9132 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009133 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009134 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009135 ((*in == 0x20) || (*in == 0x9) ||
9136 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009137 if (*in == 0xA) {
9138 line++, col = 1;
9139 } else {
9140 col++;
9141 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009142 in++;
9143 if (in >= end) {
9144 const xmlChar *oldbase = ctxt->input->base;
9145 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009146 if (ctxt->instate == XML_PARSER_EOF)
9147 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009148 if (oldbase != ctxt->input->base) {
9149 long delta = ctxt->input->base - oldbase;
9150 start = start + delta;
9151 in = in + delta;
9152 last = last + delta;
9153 }
9154 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009155 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9156 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9157 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009158 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009159 return(NULL);
9160 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009161 }
9162 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009163 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9164 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9165 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009166 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009167 return(NULL);
9168 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009169 if (*in != limit) goto need_complex;
9170 } else {
9171 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9172 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9173 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009174 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009175 if (in >= end) {
9176 const xmlChar *oldbase = ctxt->input->base;
9177 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009178 if (ctxt->instate == XML_PARSER_EOF)
9179 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009180 if (oldbase != ctxt->input->base) {
9181 long delta = ctxt->input->base - oldbase;
9182 start = start + delta;
9183 in = in + delta;
9184 }
9185 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009186 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9187 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9188 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009189 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009190 return(NULL);
9191 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009192 }
9193 }
9194 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009195 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9196 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9197 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009198 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009199 return(NULL);
9200 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009201 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009202 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009203 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009204 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009205 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009206 *len = last - start;
9207 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009208 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009209 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009210 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009211 }
9212 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009213 ctxt->input->line = line;
9214 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009215 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009216 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009217need_complex:
9218 if (alloc) *alloc = 1;
9219 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009220}
9221
9222/**
9223 * xmlParseAttribute2:
9224 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009225 * @pref: the element prefix
9226 * @elem: the element name
9227 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009228 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009229 * @len: an int * to save the length of the attribute
9230 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009231 *
9232 * parse an attribute in the new SAX2 framework.
9233 *
9234 * Returns the attribute name, and the value in *value, .
9235 */
9236
9237static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009238xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009239 const xmlChar * pref, const xmlChar * elem,
9240 const xmlChar ** prefix, xmlChar ** value,
9241 int *len, int *alloc)
9242{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009243 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009244 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009245 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009246
9247 *value = NULL;
9248 GROW;
9249 name = xmlParseQName(ctxt, prefix);
9250 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009251 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9252 "error parsing attribute name\n");
9253 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009254 }
9255
9256 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009257 * get the type if needed
9258 */
9259 if (ctxt->attsSpecial != NULL) {
9260 int type;
9261
9262 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009263 pref, elem, *prefix, name);
9264 if (type != 0)
9265 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009266 }
9267
9268 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009269 * read the value
9270 */
9271 SKIP_BLANKS;
9272 if (RAW == '=') {
9273 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009274 SKIP_BLANKS;
9275 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9276 if (normalize) {
9277 /*
9278 * Sometimes a second normalisation pass for spaces is needed
9279 * but that only happens if charrefs or entities refernces
9280 * have been used in the attribute value, i.e. the attribute
9281 * value have been extracted in an allocated string already.
9282 */
9283 if (*alloc) {
9284 const xmlChar *val2;
9285
9286 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009287 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009288 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009289 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009290 }
9291 }
9292 }
9293 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009294 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009295 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9296 "Specification mandate value for attribute %s\n",
9297 name);
9298 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009299 }
9300
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009301 if (*prefix == ctxt->str_xml) {
9302 /*
9303 * Check that xml:lang conforms to the specification
9304 * No more registered as an error, just generate a warning now
9305 * since this was deprecated in XML second edition
9306 */
9307 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9308 internal_val = xmlStrndup(val, *len);
9309 if (!xmlCheckLanguageID(internal_val)) {
9310 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9311 "Malformed value for xml:lang : %s\n",
9312 internal_val, NULL);
9313 }
9314 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009315
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009316 /*
9317 * Check that xml:space conforms to the specification
9318 */
9319 if (xmlStrEqual(name, BAD_CAST "space")) {
9320 internal_val = xmlStrndup(val, *len);
9321 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9322 *(ctxt->space) = 0;
9323 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9324 *(ctxt->space) = 1;
9325 else {
9326 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9327 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9328 internal_val, NULL);
9329 }
9330 }
9331 if (internal_val) {
9332 xmlFree(internal_val);
9333 }
9334 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009335
9336 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009337 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009338}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009339/**
9340 * xmlParseStartTag2:
9341 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009342 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009343 * parse a start of tag either for rule element or
9344 * EmptyElement. In both case we don't parse the tag closing chars.
9345 * This routine is called when running SAX2 parsing
9346 *
9347 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9348 *
9349 * [ WFC: Unique Att Spec ]
9350 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009351 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009352 *
9353 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9354 *
9355 * [ WFC: Unique Att Spec ]
9356 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009357 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009358 *
9359 * With namespace:
9360 *
9361 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9362 *
9363 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9364 *
9365 * Returns the element name parsed
9366 */
9367
9368static const xmlChar *
9369xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009370 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009371 const xmlChar *localname;
9372 const xmlChar *prefix;
9373 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009374 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009375 const xmlChar *nsname;
9376 xmlChar *attvalue;
9377 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009378 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009379 int nratts, nbatts, nbdef;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009380 int i, j, nbNs, attval, oldline, oldcol, inputNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009381 const xmlChar *base;
9382 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009383 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009384
9385 if (RAW != '<') return(NULL);
9386 NEXT1;
9387
9388 /*
9389 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9390 * point since the attribute values may be stored as pointers to
9391 * the buffer and calling SHRINK would destroy them !
9392 * The Shrinking is only possible once the full set of attribute
9393 * callbacks have been done.
9394 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009395reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009396 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009397 base = ctxt->input->base;
9398 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009399 inputNr = ctxt->inputNr;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009400 oldline = ctxt->input->line;
9401 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009402 nbatts = 0;
9403 nratts = 0;
9404 nbdef = 0;
9405 nbNs = 0;
9406 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009407 /* Forget any namespaces added during an earlier parse of this element. */
9408 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009409
9410 localname = xmlParseQName(ctxt, &prefix);
9411 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009412 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9413 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009414 return(NULL);
9415 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009416 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009417
9418 /*
9419 * Now parse the attributes, it ends up with the ending
9420 *
9421 * (S Attribute)* S?
9422 */
9423 SKIP_BLANKS;
9424 GROW;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009425 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9426 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009427
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009428 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009429 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009430 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009431 const xmlChar *q = CUR_PTR;
9432 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009433 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009434
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009435 attname = xmlParseAttribute2(ctxt, prefix, localname,
9436 &aprefix, &attvalue, &len, &alloc);
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009437 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
Daniel Veillarddcec6722006-10-15 20:32:53 +00009438 if ((attvalue != NULL) && (alloc != 0))
9439 xmlFree(attvalue);
9440 attvalue = NULL;
9441 goto base_changed;
9442 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009443 if ((attname != NULL) && (attvalue != NULL)) {
9444 if (len < 0) len = xmlStrlen(attvalue);
9445 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009446 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9447 xmlURIPtr uri;
9448
Daniel Veillardc836ba62014-07-14 16:39:50 +08009449 if (URL == NULL) {
9450 xmlErrMemory(ctxt, "dictionary allocation failure");
9451 if ((attvalue != NULL) && (alloc != 0))
9452 xmlFree(attvalue);
9453 return(NULL);
9454 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009455 if (*URL != 0) {
9456 uri = xmlParseURI((const char *) URL);
9457 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009458 xmlNsErr(ctxt, XML_WAR_NS_URI,
9459 "xmlns: '%s' is not a valid URI\n",
9460 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009461 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009462 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009463 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9464 "xmlns: URI %s is not absolute\n",
9465 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009466 }
9467 xmlFreeURI(uri);
9468 }
Daniel Veillard37334572008-07-31 08:20:02 +00009469 if (URL == ctxt->str_xml_ns) {
9470 if (attname != ctxt->str_xml) {
9471 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9472 "xml namespace URI cannot be the default namespace\n",
9473 NULL, NULL, NULL);
9474 }
9475 goto skip_default_ns;
9476 }
9477 if ((len == 29) &&
9478 (xmlStrEqual(URL,
9479 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9480 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9481 "reuse of the xmlns namespace name is forbidden\n",
9482 NULL, NULL, NULL);
9483 goto skip_default_ns;
9484 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009485 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009486 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009487 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009488 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009489 for (j = 1;j <= nbNs;j++)
9490 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9491 break;
9492 if (j <= nbNs)
9493 xmlErrAttributeDup(ctxt, NULL, attname);
9494 else
9495 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009496skip_default_ns:
Pranjal Jumde38eae572016-03-07 14:04:08 -08009497 if ((attvalue != NULL) && (alloc != 0)) {
9498 xmlFree(attvalue);
9499 attvalue = NULL;
9500 }
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009501 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9502 break;
9503 if (!IS_BLANK_CH(RAW)) {
9504 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9505 "attributes construct error\n");
9506 break;
9507 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009508 SKIP_BLANKS;
Pranjal Jumde38eae572016-03-07 14:04:08 -08009509 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9510 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009511 continue;
9512 }
9513 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009514 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9515 xmlURIPtr uri;
9516
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009517 if (attname == ctxt->str_xml) {
9518 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009519 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9520 "xml namespace prefix mapped to wrong URI\n",
9521 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009522 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009523 /*
9524 * Do not keep a namespace definition node
9525 */
Daniel Veillard37334572008-07-31 08:20:02 +00009526 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009527 }
Daniel Veillard37334572008-07-31 08:20:02 +00009528 if (URL == ctxt->str_xml_ns) {
9529 if (attname != ctxt->str_xml) {
9530 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9531 "xml namespace URI mapped to wrong prefix\n",
9532 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009533 }
Daniel Veillard37334572008-07-31 08:20:02 +00009534 goto skip_ns;
9535 }
9536 if (attname == ctxt->str_xmlns) {
9537 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9538 "redefinition of the xmlns prefix is forbidden\n",
9539 NULL, NULL, NULL);
9540 goto skip_ns;
9541 }
9542 if ((len == 29) &&
9543 (xmlStrEqual(URL,
9544 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9545 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9546 "reuse of the xmlns namespace name is forbidden\n",
9547 NULL, NULL, NULL);
9548 goto skip_ns;
9549 }
9550 if ((URL == NULL) || (URL[0] == 0)) {
9551 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9552 "xmlns:%s: Empty XML namespace is not allowed\n",
9553 attname, NULL, NULL);
9554 goto skip_ns;
9555 } else {
9556 uri = xmlParseURI((const char *) URL);
9557 if (uri == NULL) {
9558 xmlNsErr(ctxt, XML_WAR_NS_URI,
9559 "xmlns:%s: '%s' is not a valid URI\n",
9560 attname, URL, NULL);
9561 } else {
9562 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9563 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9564 "xmlns:%s: URI %s is not absolute\n",
9565 attname, URL, NULL);
9566 }
9567 xmlFreeURI(uri);
9568 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009569 }
9570
Daniel Veillard0fb18932003-09-07 09:14:37 +00009571 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009572 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009573 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009574 for (j = 1;j <= nbNs;j++)
9575 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9576 break;
9577 if (j <= nbNs)
9578 xmlErrAttributeDup(ctxt, aprefix, attname);
9579 else
9580 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009581skip_ns:
Pranjal Jumde38eae572016-03-07 14:04:08 -08009582 if ((attvalue != NULL) && (alloc != 0)) {
9583 xmlFree(attvalue);
9584 attvalue = NULL;
9585 }
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009586 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9587 break;
9588 if (!IS_BLANK_CH(RAW)) {
9589 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9590 "attributes construct error\n");
9591 break;
9592 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009593 SKIP_BLANKS;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009594 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9595 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009596 continue;
9597 }
9598
9599 /*
9600 * Add the pair to atts
9601 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009602 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9603 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009604 if (attvalue[len] == 0)
9605 xmlFree(attvalue);
9606 goto failed;
9607 }
9608 maxatts = ctxt->maxatts;
9609 atts = ctxt->atts;
9610 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009611 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009612 atts[nbatts++] = attname;
9613 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009614 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009615 atts[nbatts++] = attvalue;
9616 attvalue += len;
9617 atts[nbatts++] = attvalue;
9618 /*
9619 * tag if some deallocation is needed
9620 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009621 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009622 } else {
9623 if ((attvalue != NULL) && (attvalue[len] == 0))
9624 xmlFree(attvalue);
9625 }
9626
Daniel Veillard37334572008-07-31 08:20:02 +00009627failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009628
9629 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009630 if (ctxt->instate == XML_PARSER_EOF)
9631 break;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009632 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9633 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009634 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9635 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009636 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009637 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9638 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009639 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009640 }
9641 SKIP_BLANKS;
9642 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9643 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009644 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009645 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009646 break;
9647 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009648 GROW;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009649 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9650 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009651 }
9652
Daniel Veillard0fb18932003-09-07 09:14:37 +00009653 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009654 * The attributes defaulting
9655 */
9656 if (ctxt->attsDefault != NULL) {
9657 xmlDefAttrsPtr defaults;
9658
9659 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9660 if (defaults != NULL) {
9661 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009662 attname = defaults->values[5 * i];
9663 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009664
9665 /*
9666 * special work for namespaces defaulted defs
9667 */
9668 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9669 /*
9670 * check that it's not a defined namespace
9671 */
9672 for (j = 1;j <= nbNs;j++)
9673 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9674 break;
9675 if (j <= nbNs) continue;
9676
9677 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009678 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009679 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009680 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009681 nbNs++;
9682 }
9683 } else if (aprefix == ctxt->str_xmlns) {
9684 /*
9685 * check that it's not a defined namespace
9686 */
9687 for (j = 1;j <= nbNs;j++)
9688 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9689 break;
9690 if (j <= nbNs) continue;
9691
9692 nsname = xmlGetNamespace(ctxt, attname);
9693 if (nsname != defaults->values[2]) {
9694 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009695 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009696 nbNs++;
9697 }
9698 } else {
9699 /*
9700 * check that it's not a defined attribute
9701 */
9702 for (j = 0;j < nbatts;j+=5) {
9703 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9704 break;
9705 }
9706 if (j < nbatts) continue;
9707
9708 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9709 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009710 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009711 }
9712 maxatts = ctxt->maxatts;
9713 atts = ctxt->atts;
9714 }
9715 atts[nbatts++] = attname;
9716 atts[nbatts++] = aprefix;
9717 if (aprefix == NULL)
9718 atts[nbatts++] = NULL;
9719 else
9720 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009721 atts[nbatts++] = defaults->values[5 * i + 2];
9722 atts[nbatts++] = defaults->values[5 * i + 3];
9723 if ((ctxt->standalone == 1) &&
9724 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009725 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009726 "standalone: attribute %s on %s defaulted from external subset\n",
9727 attname, localname);
9728 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009729 nbdef++;
9730 }
9731 }
9732 }
9733 }
9734
Daniel Veillarde70c8772003-11-25 07:21:18 +00009735 /*
9736 * The attributes checkings
9737 */
9738 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009739 /*
9740 * The default namespace does not apply to attribute names.
9741 */
9742 if (atts[i + 1] != NULL) {
9743 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9744 if (nsname == NULL) {
9745 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9746 "Namespace prefix %s for %s on %s is not defined\n",
9747 atts[i + 1], atts[i], localname);
9748 }
9749 atts[i + 2] = nsname;
9750 } else
9751 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009752 /*
9753 * [ WFC: Unique Att Spec ]
9754 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009755 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009756 * As extended by the Namespace in XML REC.
9757 */
9758 for (j = 0; j < i;j += 5) {
9759 if (atts[i] == atts[j]) {
9760 if (atts[i+1] == atts[j+1]) {
9761 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9762 break;
9763 }
9764 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9765 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9766 "Namespaced Attribute %s in '%s' redefined\n",
9767 atts[i], nsname, NULL);
9768 break;
9769 }
9770 }
9771 }
9772 }
9773
Daniel Veillarde57ec792003-09-10 10:50:59 +00009774 nsname = xmlGetNamespace(ctxt, prefix);
9775 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009776 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9777 "Namespace prefix %s on %s is not defined\n",
9778 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009779 }
9780 *pref = prefix;
9781 *URI = nsname;
9782
9783 /*
9784 * SAX: Start of Element !
9785 */
9786 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9787 (!ctxt->disableSAX)) {
9788 if (nbNs > 0)
9789 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9790 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9791 nbatts / 5, nbdef, atts);
9792 else
9793 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9794 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9795 }
9796
9797 /*
9798 * Free up attribute allocated strings if needed
9799 */
9800 if (attval != 0) {
9801 for (i = 3,j = 0; j < nratts;i += 5,j++)
9802 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9803 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009804 }
9805
9806 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009807
9808base_changed:
9809 /*
9810 * the attribute strings are valid iif the base didn't changed
9811 */
9812 if (attval != 0) {
9813 for (i = 3,j = 0; j < nratts;i += 5,j++)
9814 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9815 xmlFree((xmlChar *) atts[i]);
9816 }
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009817
9818 /*
9819 * We can't switch from one entity to another in the middle
9820 * of a start tag
9821 */
9822 if (inputNr != ctxt->inputNr) {
9823 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9824 "Start tag doesn't start and stop in the same entity\n");
9825 return(NULL);
9826 }
9827
Daniel Veillarde57ec792003-09-10 10:50:59 +00009828 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009829 ctxt->input->line = oldline;
9830 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009831 if (ctxt->wellFormed == 1) {
9832 goto reparse;
9833 }
9834 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009835}
9836
9837/**
9838 * xmlParseEndTag2:
9839 * @ctxt: an XML parser context
9840 * @line: line of the start tag
9841 * @nsNr: number of namespaces on the start tag
9842 *
9843 * parse an end of tag
9844 *
9845 * [42] ETag ::= '</' Name S? '>'
9846 *
9847 * With namespace
9848 *
9849 * [NS 9] ETag ::= '</' QName S? '>'
9850 */
9851
9852static void
9853xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009854 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009855 const xmlChar *name;
David Kilzerdb07dd62016-02-12 09:58:29 -08009856 size_t curLength;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009857
9858 GROW;
9859 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009860 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009861 return;
9862 }
9863 SKIP(2);
9864
David Kilzerdb07dd62016-02-12 09:58:29 -08009865 curLength = ctxt->input->end - ctxt->input->cur;
9866 if ((tlen > 0) && (curLength >= (size_t)tlen) &&
9867 (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
9868 if ((curLength >= (size_t)(tlen + 1)) &&
9869 (ctxt->input->cur[tlen] == '>')) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009870 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009871 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009872 goto done;
9873 }
9874 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009875 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009876 name = (xmlChar*)1;
9877 } else {
9878 if (prefix == NULL)
9879 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9880 else
9881 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9882 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009883
9884 /*
9885 * We should definitely be at the ending "S? '>'" part
9886 */
9887 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009888 if (ctxt->instate == XML_PARSER_EOF)
9889 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009890 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009891 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009892 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009893 } else
9894 NEXT1;
9895
9896 /*
9897 * [ WFC: Element Type Match ]
9898 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009899 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009900 *
9901 */
9902 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009903 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009904 if ((line == 0) && (ctxt->node != NULL))
9905 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009906 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009907 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009908 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009909 }
9910
9911 /*
9912 * SAX: End of Tag
9913 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009914done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009915 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9916 (!ctxt->disableSAX))
9917 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9918
Daniel Veillard0fb18932003-09-07 09:14:37 +00009919 spacePop(ctxt);
9920 if (nsNr != 0)
9921 nsPop(ctxt, nsNr);
9922 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009923}
9924
9925/**
Owen Taylor3473f882001-02-23 17:55:21 +00009926 * xmlParseCDSect:
9927 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009928 *
Owen Taylor3473f882001-02-23 17:55:21 +00009929 * Parse escaped pure raw content.
9930 *
9931 * [18] CDSect ::= CDStart CData CDEnd
9932 *
9933 * [19] CDStart ::= '<![CDATA['
9934 *
9935 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9936 *
9937 * [21] CDEnd ::= ']]>'
9938 */
9939void
9940xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9941 xmlChar *buf = NULL;
9942 int len = 0;
9943 int size = XML_PARSER_BUFFER_SIZE;
9944 int r, rl;
9945 int s, sl;
9946 int cur, l;
9947 int count = 0;
9948
Daniel Veillard8f597c32003-10-06 08:19:27 +00009949 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009950 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009951 SKIP(9);
9952 } else
9953 return;
9954
9955 ctxt->instate = XML_PARSER_CDATA_SECTION;
9956 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009957 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009958 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009959 ctxt->instate = XML_PARSER_CONTENT;
9960 return;
9961 }
9962 NEXTL(rl);
9963 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009964 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009965 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009966 ctxt->instate = XML_PARSER_CONTENT;
9967 return;
9968 }
9969 NEXTL(sl);
9970 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009971 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009972 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009973 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009974 return;
9975 }
William M. Brack871611b2003-10-18 04:53:14 +00009976 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009977 ((r != ']') || (s != ']') || (cur != '>'))) {
9978 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009979 xmlChar *tmp;
9980
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009981 if ((size > XML_MAX_TEXT_LENGTH) &&
9982 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9983 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9984 "CData section too big found", NULL);
9985 xmlFree (buf);
9986 return;
9987 }
9988 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009989 if (tmp == NULL) {
9990 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009991 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009992 return;
9993 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009994 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009995 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009996 }
9997 COPY_BUF(rl,buf,len,r);
9998 r = s;
9999 rl = sl;
10000 s = cur;
10001 sl = l;
10002 count++;
10003 if (count > 50) {
10004 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010005 if (ctxt->instate == XML_PARSER_EOF) {
10006 xmlFree(buf);
10007 return;
10008 }
Owen Taylor3473f882001-02-23 17:55:21 +000010009 count = 0;
10010 }
10011 NEXTL(l);
10012 cur = CUR_CHAR(l);
10013 }
10014 buf[len] = 0;
10015 ctxt->instate = XML_PARSER_CONTENT;
10016 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010017 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +000010018 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010019 xmlFree(buf);
10020 return;
10021 }
10022 NEXTL(l);
10023
10024 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010025 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +000010026 */
10027 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10028 if (ctxt->sax->cdataBlock != NULL)
10029 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +000010030 else if (ctxt->sax->characters != NULL)
10031 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +000010032 }
10033 xmlFree(buf);
10034}
10035
10036/**
10037 * xmlParseContent:
10038 * @ctxt: an XML parser context
10039 *
10040 * Parse a content:
10041 *
10042 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10043 */
10044
10045void
10046xmlParseContent(xmlParserCtxtPtr ctxt) {
10047 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +000010048 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010049 ((RAW != '<') || (NXT(1) != '/')) &&
10050 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010051 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +000010052 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +000010053 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010054
10055 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010056 * First case : a Processing Instruction.
10057 */
Daniel Veillardfdc91562002-07-01 21:52:03 +000010058 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010059 xmlParsePI(ctxt);
10060 }
10061
10062 /*
10063 * Second case : a CDSection
10064 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010065 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010066 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010067 xmlParseCDSect(ctxt);
10068 }
10069
10070 /*
10071 * Third case : a comment
10072 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010073 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010074 (NXT(2) == '-') && (NXT(3) == '-')) {
10075 xmlParseComment(ctxt);
10076 ctxt->instate = XML_PARSER_CONTENT;
10077 }
10078
10079 /*
10080 * Fourth case : a sub-element.
10081 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010082 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +000010083 xmlParseElement(ctxt);
10084 }
10085
10086 /*
10087 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010088 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +000010089 */
10090
Daniel Veillard21a0f912001-02-25 19:54:14 +000010091 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +000010092 xmlParseReference(ctxt);
10093 }
10094
10095 /*
10096 * Last case, text. Note that References are handled directly.
10097 */
10098 else {
10099 xmlParseCharData(ctxt, 0);
10100 }
10101
10102 GROW;
10103 /*
10104 * Pop-up of finished entities.
10105 */
Daniel Veillard561b7f82002-03-20 21:55:57 +000010106 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +000010107 xmlPopInput(ctxt);
10108 SHRINK;
10109
Daniel Veillardfdc91562002-07-01 21:52:03 +000010110 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010111 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10112 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080010113 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010114 break;
10115 }
10116 }
10117}
10118
10119/**
10120 * xmlParseElement:
10121 * @ctxt: an XML parser context
10122 *
10123 * parse an XML element, this is highly recursive
10124 *
10125 * [39] element ::= EmptyElemTag | STag content ETag
10126 *
10127 * [ WFC: Element Type Match ]
10128 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010129 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +000010130 *
Owen Taylor3473f882001-02-23 17:55:21 +000010131 */
10132
10133void
10134xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010135 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010136 const xmlChar *prefix = NULL;
10137 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010138 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010139 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010140 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010141 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010142
Daniel Veillard8915c152008-08-26 13:05:34 +000010143 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10144 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10145 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10146 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10147 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +080010148 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010149 return;
10150 }
10151
Owen Taylor3473f882001-02-23 17:55:21 +000010152 /* Capture start position */
10153 if (ctxt->record_info) {
10154 node_info.begin_pos = ctxt->input->consumed +
10155 (CUR_PTR - ctxt->input->base);
10156 node_info.begin_line = ctxt->input->line;
10157 }
10158
10159 if (ctxt->spaceNr == 0)
10160 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010161 else if (*ctxt->space == -2)
10162 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010163 else
10164 spacePush(ctxt, *ctxt->space);
10165
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010166 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010167#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010168 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010169#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010170 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010171#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010172 else
10173 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010174#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010175 if (ctxt->instate == XML_PARSER_EOF)
10176 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010177 if (name == NULL) {
10178 spacePop(ctxt);
10179 return;
10180 }
10181 namePush(ctxt, name);
10182 ret = ctxt->node;
10183
Daniel Veillard4432df22003-09-28 18:58:27 +000010184#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010185 /*
10186 * [ VC: Root Element Type ]
10187 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010188 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010189 */
10190 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10191 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10192 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010193#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010194
10195 /*
10196 * Check for an Empty Element.
10197 */
10198 if ((RAW == '/') && (NXT(1) == '>')) {
10199 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010200 if (ctxt->sax2) {
10201 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10202 (!ctxt->disableSAX))
10203 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010204#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010205 } else {
10206 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10207 (!ctxt->disableSAX))
10208 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010209#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010210 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010211 namePop(ctxt);
10212 spacePop(ctxt);
10213 if (nsNr != ctxt->nsNr)
10214 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010215 if ( ret != NULL && ctxt->record_info ) {
10216 node_info.end_pos = ctxt->input->consumed +
10217 (CUR_PTR - ctxt->input->base);
10218 node_info.end_line = ctxt->input->line;
10219 node_info.node = ret;
10220 xmlParserAddNodeInfo(ctxt, &node_info);
10221 }
10222 return;
10223 }
10224 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010225 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010226 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010227 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10228 "Couldn't find end of Start Tag %s line %d\n",
10229 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010230
10231 /*
10232 * end of parsing of this node.
10233 */
10234 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010235 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010236 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010237 if (nsNr != ctxt->nsNr)
10238 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010239
10240 /*
10241 * Capture end position and add node
10242 */
10243 if ( ret != NULL && ctxt->record_info ) {
10244 node_info.end_pos = ctxt->input->consumed +
10245 (CUR_PTR - ctxt->input->base);
10246 node_info.end_line = ctxt->input->line;
10247 node_info.node = ret;
10248 xmlParserAddNodeInfo(ctxt, &node_info);
10249 }
10250 return;
10251 }
10252
10253 /*
10254 * Parse the content of the element:
10255 */
10256 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010257 if (ctxt->instate == XML_PARSER_EOF)
10258 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010259 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010260 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010261 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010262 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010263
10264 /*
10265 * end of parsing of this node.
10266 */
10267 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010268 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010269 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010270 if (nsNr != ctxt->nsNr)
10271 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010272 return;
10273 }
10274
10275 /*
10276 * parse the end of tag: '</' should be here.
10277 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010278 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010279 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010280 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010281 }
10282#ifdef LIBXML_SAX1_ENABLED
10283 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010284 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010285#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010286
10287 /*
10288 * Capture end position and add node
10289 */
10290 if ( ret != NULL && ctxt->record_info ) {
10291 node_info.end_pos = ctxt->input->consumed +
10292 (CUR_PTR - ctxt->input->base);
10293 node_info.end_line = ctxt->input->line;
10294 node_info.node = ret;
10295 xmlParserAddNodeInfo(ctxt, &node_info);
10296 }
10297}
10298
10299/**
10300 * xmlParseVersionNum:
10301 * @ctxt: an XML parser context
10302 *
10303 * parse the XML version value.
10304 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010305 * [26] VersionNum ::= '1.' [0-9]+
10306 *
10307 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010308 *
10309 * Returns the string giving the XML version number, or NULL
10310 */
10311xmlChar *
10312xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10313 xmlChar *buf = NULL;
10314 int len = 0;
10315 int size = 10;
10316 xmlChar cur;
10317
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010318 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010319 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010320 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010321 return(NULL);
10322 }
10323 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010324 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010325 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010326 return(NULL);
10327 }
10328 buf[len++] = cur;
10329 NEXT;
10330 cur=CUR;
10331 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010332 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010333 return(NULL);
10334 }
10335 buf[len++] = cur;
10336 NEXT;
10337 cur=CUR;
10338 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010339 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010340 xmlChar *tmp;
10341
Owen Taylor3473f882001-02-23 17:55:21 +000010342 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010343 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10344 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010345 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010346 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010347 return(NULL);
10348 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010349 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010350 }
10351 buf[len++] = cur;
10352 NEXT;
10353 cur=CUR;
10354 }
10355 buf[len] = 0;
10356 return(buf);
10357}
10358
10359/**
10360 * xmlParseVersionInfo:
10361 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010362 *
Owen Taylor3473f882001-02-23 17:55:21 +000010363 * parse the XML version.
10364 *
10365 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010366 *
Owen Taylor3473f882001-02-23 17:55:21 +000010367 * [25] Eq ::= S? '=' S?
10368 *
10369 * Returns the version string, e.g. "1.0"
10370 */
10371
10372xmlChar *
10373xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10374 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010375
Daniel Veillarda07050d2003-10-19 14:46:32 +000010376 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010377 SKIP(7);
10378 SKIP_BLANKS;
10379 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010380 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010381 return(NULL);
10382 }
10383 NEXT;
10384 SKIP_BLANKS;
10385 if (RAW == '"') {
10386 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010387 version = xmlParseVersionNum(ctxt);
10388 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010389 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010390 } else
10391 NEXT;
10392 } else if (RAW == '\''){
10393 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010394 version = xmlParseVersionNum(ctxt);
10395 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010396 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010397 } else
10398 NEXT;
10399 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010400 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010401 }
10402 }
10403 return(version);
10404}
10405
10406/**
10407 * xmlParseEncName:
10408 * @ctxt: an XML parser context
10409 *
10410 * parse the XML encoding name
10411 *
10412 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10413 *
10414 * Returns the encoding name value or NULL
10415 */
10416xmlChar *
10417xmlParseEncName(xmlParserCtxtPtr ctxt) {
10418 xmlChar *buf = NULL;
10419 int len = 0;
10420 int size = 10;
10421 xmlChar cur;
10422
10423 cur = CUR;
10424 if (((cur >= 'a') && (cur <= 'z')) ||
10425 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010426 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010427 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010428 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010429 return(NULL);
10430 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010431
Owen Taylor3473f882001-02-23 17:55:21 +000010432 buf[len++] = cur;
10433 NEXT;
10434 cur = CUR;
10435 while (((cur >= 'a') && (cur <= 'z')) ||
10436 ((cur >= 'A') && (cur <= 'Z')) ||
10437 ((cur >= '0') && (cur <= '9')) ||
10438 (cur == '.') || (cur == '_') ||
10439 (cur == '-')) {
10440 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010441 xmlChar *tmp;
10442
Owen Taylor3473f882001-02-23 17:55:21 +000010443 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010444 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10445 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010446 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010447 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010448 return(NULL);
10449 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010450 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010451 }
10452 buf[len++] = cur;
10453 NEXT;
10454 cur = CUR;
10455 if (cur == 0) {
10456 SHRINK;
10457 GROW;
10458 cur = CUR;
10459 }
10460 }
10461 buf[len] = 0;
10462 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010463 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010464 }
10465 return(buf);
10466}
10467
10468/**
10469 * xmlParseEncodingDecl:
10470 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010471 *
Owen Taylor3473f882001-02-23 17:55:21 +000010472 * parse the XML encoding declaration
10473 *
10474 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10475 *
10476 * this setups the conversion filters.
10477 *
10478 * Returns the encoding value or NULL
10479 */
10480
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010481const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010482xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10483 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010484
10485 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010486 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010487 SKIP(8);
10488 SKIP_BLANKS;
10489 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010490 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010491 return(NULL);
10492 }
10493 NEXT;
10494 SKIP_BLANKS;
10495 if (RAW == '"') {
10496 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010497 encoding = xmlParseEncName(ctxt);
10498 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010499 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010500 xmlFree((xmlChar *) encoding);
10501 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010502 } else
10503 NEXT;
10504 } else if (RAW == '\''){
10505 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010506 encoding = xmlParseEncName(ctxt);
10507 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010508 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010509 xmlFree((xmlChar *) encoding);
10510 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010511 } else
10512 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010513 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010514 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010515 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010516
10517 /*
10518 * Non standard parsing, allowing the user to ignore encoding
10519 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010520 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10521 xmlFree((xmlChar *) encoding);
10522 return(NULL);
10523 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010524
Daniel Veillard6b621b82003-08-11 15:03:34 +000010525 /*
10526 * UTF-16 encoding stwich has already taken place at this stage,
10527 * more over the little-endian/big-endian selection is already done
10528 */
10529 if ((encoding != NULL) &&
10530 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10531 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010532 /*
10533 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010534 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010535 * document is apparently UTF-8 compatible, then raise an
10536 * encoding mismatch fatal error
10537 */
10538 if ((ctxt->encoding == NULL) &&
10539 (ctxt->input->buf != NULL) &&
10540 (ctxt->input->buf->encoder == NULL)) {
10541 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10542 "Document labelled UTF-16 but has UTF-8 content\n");
10543 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010544 if (ctxt->encoding != NULL)
10545 xmlFree((xmlChar *) ctxt->encoding);
10546 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010547 }
10548 /*
10549 * UTF-8 encoding is handled natively
10550 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010551 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010552 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10553 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010554 if (ctxt->encoding != NULL)
10555 xmlFree((xmlChar *) ctxt->encoding);
10556 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010557 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010558 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010559 xmlCharEncodingHandlerPtr handler;
10560
10561 if (ctxt->input->encoding != NULL)
10562 xmlFree((xmlChar *) ctxt->input->encoding);
10563 ctxt->input->encoding = encoding;
10564
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010565 handler = xmlFindCharEncodingHandler((const char *) encoding);
10566 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010567 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10568 /* failed to convert */
10569 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10570 return(NULL);
10571 }
Owen Taylor3473f882001-02-23 17:55:21 +000010572 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010573 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010574 "Unsupported encoding %s\n", encoding);
10575 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010576 }
10577 }
10578 }
10579 return(encoding);
10580}
10581
10582/**
10583 * xmlParseSDDecl:
10584 * @ctxt: an XML parser context
10585 *
10586 * parse the XML standalone declaration
10587 *
10588 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010589 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010590 *
10591 * [ VC: Standalone Document Declaration ]
10592 * TODO The standalone document declaration must have the value "no"
10593 * if any external markup declarations contain declarations of:
10594 * - attributes with default values, if elements to which these
10595 * attributes apply appear in the document without specifications
10596 * of values for these attributes, or
10597 * - entities (other than amp, lt, gt, apos, quot), if references
10598 * to those entities appear in the document, or
10599 * - attributes with values subject to normalization, where the
10600 * attribute appears in the document with a value which will change
10601 * as a result of normalization, or
10602 * - element types with element content, if white space occurs directly
10603 * within any instance of those types.
10604 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010605 * Returns:
10606 * 1 if standalone="yes"
10607 * 0 if standalone="no"
10608 * -2 if standalone attribute is missing or invalid
10609 * (A standalone value of -2 means that the XML declaration was found,
10610 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010611 */
10612
10613int
10614xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010615 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010616
10617 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010618 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010619 SKIP(10);
10620 SKIP_BLANKS;
10621 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010622 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010623 return(standalone);
10624 }
10625 NEXT;
10626 SKIP_BLANKS;
10627 if (RAW == '\''){
10628 NEXT;
10629 if ((RAW == 'n') && (NXT(1) == 'o')) {
10630 standalone = 0;
10631 SKIP(2);
10632 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10633 (NXT(2) == 's')) {
10634 standalone = 1;
10635 SKIP(3);
10636 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010637 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010638 }
10639 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010640 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010641 } else
10642 NEXT;
10643 } else if (RAW == '"'){
10644 NEXT;
10645 if ((RAW == 'n') && (NXT(1) == 'o')) {
10646 standalone = 0;
10647 SKIP(2);
10648 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10649 (NXT(2) == 's')) {
10650 standalone = 1;
10651 SKIP(3);
10652 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010653 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010654 }
10655 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010656 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010657 } else
10658 NEXT;
10659 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010660 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010661 }
10662 }
10663 return(standalone);
10664}
10665
10666/**
10667 * xmlParseXMLDecl:
10668 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010669 *
Owen Taylor3473f882001-02-23 17:55:21 +000010670 * parse an XML declaration header
10671 *
10672 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10673 */
10674
10675void
10676xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10677 xmlChar *version;
10678
10679 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010680 * This value for standalone indicates that the document has an
10681 * XML declaration but it does not have a standalone attribute.
10682 * It will be overwritten later if a standalone attribute is found.
10683 */
10684 ctxt->input->standalone = -2;
10685
10686 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010687 * We know that '<?xml' is here.
10688 */
10689 SKIP(5);
10690
William M. Brack76e95df2003-10-18 16:20:14 +000010691 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010692 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10693 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010694 }
10695 SKIP_BLANKS;
10696
10697 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010698 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010699 */
10700 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010701 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010702 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010703 } else {
10704 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10705 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010706 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010707 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010708 if (ctxt->options & XML_PARSE_OLD10) {
10709 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10710 "Unsupported version '%s'\n",
10711 version);
10712 } else {
10713 if ((version[0] == '1') && ((version[1] == '.'))) {
10714 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10715 "Unsupported version '%s'\n",
10716 version, NULL);
10717 } else {
10718 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10719 "Unsupported version '%s'\n",
10720 version);
10721 }
10722 }
Daniel Veillard19840942001-11-29 16:11:38 +000010723 }
10724 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010725 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010726 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010727 }
Owen Taylor3473f882001-02-23 17:55:21 +000010728
10729 /*
10730 * We may have the encoding declaration
10731 */
William M. Brack76e95df2003-10-18 16:20:14 +000010732 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010733 if ((RAW == '?') && (NXT(1) == '>')) {
10734 SKIP(2);
10735 return;
10736 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010737 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010738 }
10739 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010740 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10741 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010742 /*
10743 * The XML REC instructs us to stop parsing right here
10744 */
10745 return;
10746 }
10747
10748 /*
10749 * We may have the standalone status.
10750 */
William M. Brack76e95df2003-10-18 16:20:14 +000010751 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010752 if ((RAW == '?') && (NXT(1) == '>')) {
10753 SKIP(2);
10754 return;
10755 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010756 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010757 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010758
10759 /*
10760 * We can grow the input buffer freely at that point
10761 */
10762 GROW;
10763
Owen Taylor3473f882001-02-23 17:55:21 +000010764 SKIP_BLANKS;
10765 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10766
10767 SKIP_BLANKS;
10768 if ((RAW == '?') && (NXT(1) == '>')) {
10769 SKIP(2);
10770 } else if (RAW == '>') {
10771 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010772 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010773 NEXT;
10774 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010775 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010776 MOVETO_ENDTAG(CUR_PTR);
10777 NEXT;
10778 }
10779}
10780
10781/**
10782 * xmlParseMisc:
10783 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010784 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010785 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010786 *
10787 * [27] Misc ::= Comment | PI | S
10788 */
10789
10790void
10791xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010792 while ((ctxt->instate != XML_PARSER_EOF) &&
10793 (((RAW == '<') && (NXT(1) == '?')) ||
10794 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10795 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010796 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010797 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010798 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010799 NEXT;
10800 } else
10801 xmlParseComment(ctxt);
10802 }
10803}
10804
10805/**
10806 * xmlParseDocument:
10807 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010808 *
Owen Taylor3473f882001-02-23 17:55:21 +000010809 * parse an XML document (and build a tree if using the standard SAX
10810 * interface).
10811 *
10812 * [1] document ::= prolog element Misc*
10813 *
10814 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10815 *
10816 * Returns 0, -1 in case of error. the parser context is augmented
10817 * as a result of the parsing.
10818 */
10819
10820int
10821xmlParseDocument(xmlParserCtxtPtr ctxt) {
10822 xmlChar start[4];
10823 xmlCharEncoding enc;
10824
10825 xmlInitParser();
10826
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010827 if ((ctxt == NULL) || (ctxt->input == NULL))
10828 return(-1);
10829
Owen Taylor3473f882001-02-23 17:55:21 +000010830 GROW;
10831
10832 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010833 * SAX: detecting the level.
10834 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010835 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010836
10837 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010838 * SAX: beginning of the document processing.
10839 */
10840 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10841 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010842 if (ctxt->instate == XML_PARSER_EOF)
10843 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010844
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010845 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010846 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010847 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010848 * Get the 4 first bytes and decode the charset
10849 * if enc != XML_CHAR_ENCODING_NONE
10850 * plug some encoding conversion routines.
10851 */
10852 start[0] = RAW;
10853 start[1] = NXT(1);
10854 start[2] = NXT(2);
10855 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010856 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010857 if (enc != XML_CHAR_ENCODING_NONE) {
10858 xmlSwitchEncoding(ctxt, enc);
10859 }
Owen Taylor3473f882001-02-23 17:55:21 +000010860 }
10861
10862
10863 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010864 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010865 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010866 }
10867
10868 /*
10869 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010870 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010871 * than just the first line, unless the amount of data is really
10872 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010873 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010874 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10875 GROW;
10876 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010877 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010878
10879 /*
10880 * Note that we will switch encoding on the fly.
10881 */
10882 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010883 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10884 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010885 /*
10886 * The XML REC instructs us to stop parsing right here
10887 */
10888 return(-1);
10889 }
10890 ctxt->standalone = ctxt->input->standalone;
10891 SKIP_BLANKS;
10892 } else {
10893 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10894 }
10895 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10896 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010897 if (ctxt->instate == XML_PARSER_EOF)
10898 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010899 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10900 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10901 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10902 }
Owen Taylor3473f882001-02-23 17:55:21 +000010903
10904 /*
10905 * The Misc part of the Prolog
10906 */
10907 GROW;
10908 xmlParseMisc(ctxt);
10909
10910 /*
10911 * Then possibly doc type declaration(s) and more Misc
10912 * (doctypedecl Misc*)?
10913 */
10914 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010915 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010916
10917 ctxt->inSubset = 1;
10918 xmlParseDocTypeDecl(ctxt);
10919 if (RAW == '[') {
10920 ctxt->instate = XML_PARSER_DTD;
10921 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010922 if (ctxt->instate == XML_PARSER_EOF)
10923 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010924 }
10925
10926 /*
10927 * Create and update the external subset.
10928 */
10929 ctxt->inSubset = 2;
10930 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10931 (!ctxt->disableSAX))
10932 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10933 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010934 if (ctxt->instate == XML_PARSER_EOF)
10935 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010936 ctxt->inSubset = 0;
10937
Daniel Veillardac4118d2008-01-11 05:27:32 +000010938 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010939
10940 ctxt->instate = XML_PARSER_PROLOG;
10941 xmlParseMisc(ctxt);
10942 }
10943
10944 /*
10945 * Time to start parsing the tree itself
10946 */
10947 GROW;
10948 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010949 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10950 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010951 } else {
10952 ctxt->instate = XML_PARSER_CONTENT;
10953 xmlParseElement(ctxt);
10954 ctxt->instate = XML_PARSER_EPILOG;
10955
10956
10957 /*
10958 * The Misc part at the end
10959 */
10960 xmlParseMisc(ctxt);
10961
Daniel Veillard561b7f82002-03-20 21:55:57 +000010962 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010963 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010964 }
10965 ctxt->instate = XML_PARSER_EOF;
10966 }
10967
10968 /*
10969 * SAX: end of the document processing.
10970 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010971 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010972 ctxt->sax->endDocument(ctxt->userData);
10973
Daniel Veillard5997aca2002-03-18 18:36:20 +000010974 /*
10975 * Remove locally kept entity definitions if the tree was not built
10976 */
10977 if ((ctxt->myDoc != NULL) &&
10978 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10979 xmlFreeDoc(ctxt->myDoc);
10980 ctxt->myDoc = NULL;
10981 }
10982
Daniel Veillardae0765b2008-07-31 19:54:59 +000010983 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10984 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10985 if (ctxt->valid)
10986 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10987 if (ctxt->nsWellFormed)
10988 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10989 if (ctxt->options & XML_PARSE_OLD10)
10990 ctxt->myDoc->properties |= XML_DOC_OLD10;
10991 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010992 if (! ctxt->wellFormed) {
10993 ctxt->valid = 0;
10994 return(-1);
10995 }
Owen Taylor3473f882001-02-23 17:55:21 +000010996 return(0);
10997}
10998
10999/**
11000 * xmlParseExtParsedEnt:
11001 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011002 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011003 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000011004 * An external general parsed entity is well-formed if it matches the
11005 * production labeled extParsedEnt.
11006 *
11007 * [78] extParsedEnt ::= TextDecl? content
11008 *
11009 * Returns 0, -1 in case of error. the parser context is augmented
11010 * as a result of the parsing.
11011 */
11012
11013int
11014xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11015 xmlChar start[4];
11016 xmlCharEncoding enc;
11017
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011018 if ((ctxt == NULL) || (ctxt->input == NULL))
11019 return(-1);
11020
Owen Taylor3473f882001-02-23 17:55:21 +000011021 xmlDefaultSAXHandlerInit();
11022
Daniel Veillard309f81d2003-09-23 09:02:53 +000011023 xmlDetectSAX2(ctxt);
11024
Owen Taylor3473f882001-02-23 17:55:21 +000011025 GROW;
11026
11027 /*
11028 * SAX: beginning of the document processing.
11029 */
11030 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11031 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11032
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011033 /*
Owen Taylor3473f882001-02-23 17:55:21 +000011034 * Get the 4 first bytes and decode the charset
11035 * if enc != XML_CHAR_ENCODING_NONE
11036 * plug some encoding conversion routines.
11037 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011038 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11039 start[0] = RAW;
11040 start[1] = NXT(1);
11041 start[2] = NXT(2);
11042 start[3] = NXT(3);
11043 enc = xmlDetectCharEncoding(start, 4);
11044 if (enc != XML_CHAR_ENCODING_NONE) {
11045 xmlSwitchEncoding(ctxt, enc);
11046 }
Owen Taylor3473f882001-02-23 17:55:21 +000011047 }
11048
11049
11050 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011051 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011052 }
11053
11054 /*
11055 * Check for the XMLDecl in the Prolog.
11056 */
11057 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000011058 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011059
11060 /*
11061 * Note that we will switch encoding on the fly.
11062 */
11063 xmlParseXMLDecl(ctxt);
11064 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11065 /*
11066 * The XML REC instructs us to stop parsing right here
11067 */
11068 return(-1);
11069 }
11070 SKIP_BLANKS;
11071 } else {
11072 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11073 }
11074 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11075 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011076 if (ctxt->instate == XML_PARSER_EOF)
11077 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000011078
11079 /*
11080 * Doing validity checking on chunk doesn't make sense
11081 */
11082 ctxt->instate = XML_PARSER_CONTENT;
11083 ctxt->validate = 0;
11084 ctxt->loadsubset = 0;
11085 ctxt->depth = 0;
11086
11087 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011088 if (ctxt->instate == XML_PARSER_EOF)
11089 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011090
Owen Taylor3473f882001-02-23 17:55:21 +000011091 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011092 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011093 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011094 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011095 }
11096
11097 /*
11098 * SAX: end of the document processing.
11099 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011100 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011101 ctxt->sax->endDocument(ctxt->userData);
11102
11103 if (! ctxt->wellFormed) return(-1);
11104 return(0);
11105}
11106
Daniel Veillard73b013f2003-09-30 12:36:01 +000011107#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011108/************************************************************************
11109 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011110 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000011111 * *
11112 ************************************************************************/
11113
11114/**
11115 * xmlParseLookupSequence:
11116 * @ctxt: an XML parser context
11117 * @first: the first char to lookup
11118 * @next: the next char to lookup or zero
11119 * @third: the next char to lookup or zero
11120 *
11121 * Try to find if a sequence (first, next, third) or just (first next) or
11122 * (first) is available in the input stream.
11123 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11124 * to avoid rescanning sequences of bytes, it DOES change the state of the
11125 * parser, do not use liberally.
11126 *
11127 * Returns the index to the current parsing point if the full sequence
11128 * is available, -1 otherwise.
11129 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011130static int
Owen Taylor3473f882001-02-23 17:55:21 +000011131xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11132 xmlChar next, xmlChar third) {
11133 int base, len;
11134 xmlParserInputPtr in;
11135 const xmlChar *buf;
11136
11137 in = ctxt->input;
11138 if (in == NULL) return(-1);
11139 base = in->cur - in->base;
11140 if (base < 0) return(-1);
11141 if (ctxt->checkIndex > base)
11142 base = ctxt->checkIndex;
11143 if (in->buf == NULL) {
11144 buf = in->base;
11145 len = in->length;
11146 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011147 buf = xmlBufContent(in->buf->buffer);
11148 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011149 }
11150 /* take into account the sequence length */
11151 if (third) len -= 2;
11152 else if (next) len --;
11153 for (;base < len;base++) {
11154 if (buf[base] == first) {
11155 if (third != 0) {
11156 if ((buf[base + 1] != next) ||
11157 (buf[base + 2] != third)) continue;
11158 } else if (next != 0) {
11159 if (buf[base + 1] != next) continue;
11160 }
11161 ctxt->checkIndex = 0;
11162#ifdef DEBUG_PUSH
11163 if (next == 0)
11164 xmlGenericError(xmlGenericErrorContext,
11165 "PP: lookup '%c' found at %d\n",
11166 first, base);
11167 else if (third == 0)
11168 xmlGenericError(xmlGenericErrorContext,
11169 "PP: lookup '%c%c' found at %d\n",
11170 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011171 else
Owen Taylor3473f882001-02-23 17:55:21 +000011172 xmlGenericError(xmlGenericErrorContext,
11173 "PP: lookup '%c%c%c' found at %d\n",
11174 first, next, third, base);
11175#endif
11176 return(base - (in->cur - in->base));
11177 }
11178 }
11179 ctxt->checkIndex = base;
11180#ifdef DEBUG_PUSH
11181 if (next == 0)
11182 xmlGenericError(xmlGenericErrorContext,
11183 "PP: lookup '%c' failed\n", first);
11184 else if (third == 0)
11185 xmlGenericError(xmlGenericErrorContext,
11186 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011187 else
Owen Taylor3473f882001-02-23 17:55:21 +000011188 xmlGenericError(xmlGenericErrorContext,
11189 "PP: lookup '%c%c%c' failed\n", first, next, third);
11190#endif
11191 return(-1);
11192}
11193
11194/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011195 * xmlParseGetLasts:
11196 * @ctxt: an XML parser context
11197 * @lastlt: pointer to store the last '<' from the input
11198 * @lastgt: pointer to store the last '>' from the input
11199 *
11200 * Lookup the last < and > in the current chunk
11201 */
11202static void
11203xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11204 const xmlChar **lastgt) {
11205 const xmlChar *tmp;
11206
11207 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11208 xmlGenericError(xmlGenericErrorContext,
11209 "Internal error: xmlParseGetLasts\n");
11210 return;
11211 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011212 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011213 tmp = ctxt->input->end;
11214 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011215 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011216 if (tmp < ctxt->input->base) {
11217 *lastlt = NULL;
11218 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011219 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011220 *lastlt = tmp;
11221 tmp++;
11222 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11223 if (*tmp == '\'') {
11224 tmp++;
11225 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11226 if (tmp < ctxt->input->end) tmp++;
11227 } else if (*tmp == '"') {
11228 tmp++;
11229 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11230 if (tmp < ctxt->input->end) tmp++;
11231 } else
11232 tmp++;
11233 }
11234 if (tmp < ctxt->input->end)
11235 *lastgt = tmp;
11236 else {
11237 tmp = *lastlt;
11238 tmp--;
11239 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11240 if (tmp >= ctxt->input->base)
11241 *lastgt = tmp;
11242 else
11243 *lastgt = NULL;
11244 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011245 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011246 } else {
11247 *lastlt = NULL;
11248 *lastgt = NULL;
11249 }
11250}
11251/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011252 * xmlCheckCdataPush:
David Kilzer4f8606c2016-01-05 13:38:09 -080011253 * @cur: pointer to the block of characters
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011254 * @len: length of the block in bytes
David Kilzer4f8606c2016-01-05 13:38:09 -080011255 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011256 *
11257 * Check that the block of characters is okay as SCdata content [20]
11258 *
11259 * Returns the number of bytes to pass if okay, a negative index where an
11260 * UTF-8 error occured otherwise
11261 */
11262static int
David Kilzer4f8606c2016-01-05 13:38:09 -080011263xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011264 int ix;
11265 unsigned char c;
11266 int codepoint;
11267
11268 if ((utf == NULL) || (len <= 0))
11269 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011270
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011271 for (ix = 0; ix < len;) { /* string is 0-terminated */
11272 c = utf[ix];
11273 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11274 if (c >= 0x20)
11275 ix++;
11276 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11277 ix++;
11278 else
11279 return(-ix);
11280 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011281 if (ix + 2 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011282 if ((utf[ix+1] & 0xc0 ) != 0x80)
11283 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011284 codepoint = (utf[ix] & 0x1f) << 6;
11285 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011286 if (!xmlIsCharQ(codepoint))
11287 return(-ix);
11288 ix += 2;
11289 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011290 if (ix + 3 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011291 if (((utf[ix+1] & 0xc0) != 0x80) ||
11292 ((utf[ix+2] & 0xc0) != 0x80))
11293 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011294 codepoint = (utf[ix] & 0xf) << 12;
11295 codepoint |= (utf[ix+1] & 0x3f) << 6;
11296 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011297 if (!xmlIsCharQ(codepoint))
11298 return(-ix);
11299 ix += 3;
11300 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
David Kilzer4f8606c2016-01-05 13:38:09 -080011301 if (ix + 4 > len) return(complete ? -ix : ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011302 if (((utf[ix+1] & 0xc0) != 0x80) ||
11303 ((utf[ix+2] & 0xc0) != 0x80) ||
11304 ((utf[ix+3] & 0xc0) != 0x80))
11305 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011306 codepoint = (utf[ix] & 0x7) << 18;
11307 codepoint |= (utf[ix+1] & 0x3f) << 12;
11308 codepoint |= (utf[ix+2] & 0x3f) << 6;
11309 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011310 if (!xmlIsCharQ(codepoint))
11311 return(-ix);
11312 ix += 4;
11313 } else /* unknown encoding */
11314 return(-ix);
11315 }
11316 return(ix);
11317}
11318
11319/**
Owen Taylor3473f882001-02-23 17:55:21 +000011320 * xmlParseTryOrFinish:
11321 * @ctxt: an XML parser context
11322 * @terminate: last chunk indicator
11323 *
11324 * Try to progress on parsing
11325 *
11326 * Returns zero if no parsing was possible
11327 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011328static int
Owen Taylor3473f882001-02-23 17:55:21 +000011329xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11330 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011331 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011332 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011333 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011334
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011335 if (ctxt->input == NULL)
11336 return(0);
11337
Owen Taylor3473f882001-02-23 17:55:21 +000011338#ifdef DEBUG_PUSH
11339 switch (ctxt->instate) {
11340 case XML_PARSER_EOF:
11341 xmlGenericError(xmlGenericErrorContext,
11342 "PP: try EOF\n"); break;
11343 case XML_PARSER_START:
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: try START\n"); break;
11346 case XML_PARSER_MISC:
11347 xmlGenericError(xmlGenericErrorContext,
11348 "PP: try MISC\n");break;
11349 case XML_PARSER_COMMENT:
11350 xmlGenericError(xmlGenericErrorContext,
11351 "PP: try COMMENT\n");break;
11352 case XML_PARSER_PROLOG:
11353 xmlGenericError(xmlGenericErrorContext,
11354 "PP: try PROLOG\n");break;
11355 case XML_PARSER_START_TAG:
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: try START_TAG\n");break;
11358 case XML_PARSER_CONTENT:
11359 xmlGenericError(xmlGenericErrorContext,
11360 "PP: try CONTENT\n");break;
11361 case XML_PARSER_CDATA_SECTION:
11362 xmlGenericError(xmlGenericErrorContext,
11363 "PP: try CDATA_SECTION\n");break;
11364 case XML_PARSER_END_TAG:
11365 xmlGenericError(xmlGenericErrorContext,
11366 "PP: try END_TAG\n");break;
11367 case XML_PARSER_ENTITY_DECL:
11368 xmlGenericError(xmlGenericErrorContext,
11369 "PP: try ENTITY_DECL\n");break;
11370 case XML_PARSER_ENTITY_VALUE:
11371 xmlGenericError(xmlGenericErrorContext,
11372 "PP: try ENTITY_VALUE\n");break;
11373 case XML_PARSER_ATTRIBUTE_VALUE:
11374 xmlGenericError(xmlGenericErrorContext,
11375 "PP: try ATTRIBUTE_VALUE\n");break;
11376 case XML_PARSER_DTD:
11377 xmlGenericError(xmlGenericErrorContext,
11378 "PP: try DTD\n");break;
11379 case XML_PARSER_EPILOG:
11380 xmlGenericError(xmlGenericErrorContext,
11381 "PP: try EPILOG\n");break;
11382 case XML_PARSER_PI:
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: try PI\n");break;
11385 case XML_PARSER_IGNORE:
11386 xmlGenericError(xmlGenericErrorContext,
11387 "PP: try IGNORE\n");break;
11388 }
11389#endif
11390
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011391 if ((ctxt->input != NULL) &&
11392 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011393 xmlSHRINK(ctxt);
11394 ctxt->checkIndex = 0;
11395 }
11396 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011397
Daniel Veillarde50ba812013-04-11 15:54:51 +080011398 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011399 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011400 return(0);
11401
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011402
Owen Taylor3473f882001-02-23 17:55:21 +000011403 /*
11404 * Pop-up of finished entities.
11405 */
11406 while ((RAW == 0) && (ctxt->inputNr > 1))
11407 xmlPopInput(ctxt);
11408
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011409 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011410 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011411 avail = ctxt->input->length -
11412 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011413 else {
11414 /*
11415 * If we are operating on converted input, try to flush
11416 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011417 * buffer. But do not do this in document start where
11418 * encoding="..." may not have been read and we work on a
11419 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011420 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011421 if ((ctxt->instate != XML_PARSER_START) &&
11422 (ctxt->input->buf->raw != NULL) &&
11423 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011424 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11425 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011426 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011427
11428 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011429 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11430 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011431 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011432 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011433 (ctxt->input->cur - ctxt->input->base);
11434 }
Owen Taylor3473f882001-02-23 17:55:21 +000011435 if (avail < 1)
11436 goto done;
11437 switch (ctxt->instate) {
11438 case XML_PARSER_EOF:
11439 /*
11440 * Document parsing is done !
11441 */
11442 goto done;
11443 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011444 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11445 xmlChar start[4];
11446 xmlCharEncoding enc;
11447
11448 /*
11449 * Very first chars read from the document flow.
11450 */
11451 if (avail < 4)
11452 goto done;
11453
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011454 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011455 * Get the 4 first bytes and decode the charset
11456 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011457 * plug some encoding conversion routines,
11458 * else xmlSwitchEncoding will set to (default)
11459 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011460 */
11461 start[0] = RAW;
11462 start[1] = NXT(1);
11463 start[2] = NXT(2);
11464 start[3] = NXT(3);
11465 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011466 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011467 break;
11468 }
Owen Taylor3473f882001-02-23 17:55:21 +000011469
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011470 if (avail < 2)
11471 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011472 cur = ctxt->input->cur[0];
11473 next = ctxt->input->cur[1];
11474 if (cur == 0) {
11475 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11476 ctxt->sax->setDocumentLocator(ctxt->userData,
11477 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011478 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011479 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011480#ifdef DEBUG_PUSH
11481 xmlGenericError(xmlGenericErrorContext,
11482 "PP: entering EOF\n");
11483#endif
11484 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11485 ctxt->sax->endDocument(ctxt->userData);
11486 goto done;
11487 }
11488 if ((cur == '<') && (next == '?')) {
11489 /* PI or XML decl */
11490 if (avail < 5) return(ret);
11491 if ((!terminate) &&
11492 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11493 return(ret);
11494 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11495 ctxt->sax->setDocumentLocator(ctxt->userData,
11496 &xmlDefaultSAXLocator);
11497 if ((ctxt->input->cur[2] == 'x') &&
11498 (ctxt->input->cur[3] == 'm') &&
11499 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011500 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011501 ret += 5;
11502#ifdef DEBUG_PUSH
11503 xmlGenericError(xmlGenericErrorContext,
11504 "PP: Parsing XML Decl\n");
11505#endif
11506 xmlParseXMLDecl(ctxt);
11507 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11508 /*
11509 * The XML REC instructs us to stop parsing right
11510 * here
11511 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011512 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011513 return(0);
11514 }
11515 ctxt->standalone = ctxt->input->standalone;
11516 if ((ctxt->encoding == NULL) &&
11517 (ctxt->input->encoding != NULL))
11518 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11519 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11520 (!ctxt->disableSAX))
11521 ctxt->sax->startDocument(ctxt->userData);
11522 ctxt->instate = XML_PARSER_MISC;
11523#ifdef DEBUG_PUSH
11524 xmlGenericError(xmlGenericErrorContext,
11525 "PP: entering MISC\n");
11526#endif
11527 } else {
11528 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11529 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11530 (!ctxt->disableSAX))
11531 ctxt->sax->startDocument(ctxt->userData);
11532 ctxt->instate = XML_PARSER_MISC;
11533#ifdef DEBUG_PUSH
11534 xmlGenericError(xmlGenericErrorContext,
11535 "PP: entering MISC\n");
11536#endif
11537 }
11538 } else {
11539 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11540 ctxt->sax->setDocumentLocator(ctxt->userData,
11541 &xmlDefaultSAXLocator);
11542 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011543 if (ctxt->version == NULL) {
11544 xmlErrMemory(ctxt, NULL);
11545 break;
11546 }
Owen Taylor3473f882001-02-23 17:55:21 +000011547 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11548 (!ctxt->disableSAX))
11549 ctxt->sax->startDocument(ctxt->userData);
11550 ctxt->instate = XML_PARSER_MISC;
11551#ifdef DEBUG_PUSH
11552 xmlGenericError(xmlGenericErrorContext,
11553 "PP: entering MISC\n");
11554#endif
11555 }
11556 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011557 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011558 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011559 const xmlChar *prefix = NULL;
11560 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011561 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011562
11563 if ((avail < 2) && (ctxt->inputNr == 1))
11564 goto done;
11565 cur = ctxt->input->cur[0];
11566 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011567 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011568 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011569 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11570 ctxt->sax->endDocument(ctxt->userData);
11571 goto done;
11572 }
11573 if (!terminate) {
11574 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011575 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011576 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011577 goto done;
11578 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11579 goto done;
11580 }
11581 }
11582 if (ctxt->spaceNr == 0)
11583 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011584 else if (*ctxt->space == -2)
11585 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011586 else
11587 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011588#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011589 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011590#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011591 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011592#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011593 else
11594 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011595#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011596 if (ctxt->instate == XML_PARSER_EOF)
11597 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011598 if (name == NULL) {
11599 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011600 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011601 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11602 ctxt->sax->endDocument(ctxt->userData);
11603 goto done;
11604 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011605#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011606 /*
11607 * [ VC: Root Element Type ]
11608 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011609 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011610 */
11611 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11612 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11613 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011614#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011615
11616 /*
11617 * Check for an Empty Element.
11618 */
11619 if ((RAW == '/') && (NXT(1) == '>')) {
11620 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011621
11622 if (ctxt->sax2) {
11623 if ((ctxt->sax != NULL) &&
11624 (ctxt->sax->endElementNs != NULL) &&
11625 (!ctxt->disableSAX))
11626 ctxt->sax->endElementNs(ctxt->userData, name,
11627 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011628 if (ctxt->nsNr - nsNr > 0)
11629 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011630#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011631 } else {
11632 if ((ctxt->sax != NULL) &&
11633 (ctxt->sax->endElement != NULL) &&
11634 (!ctxt->disableSAX))
11635 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011636#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011637 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011638 if (ctxt->instate == XML_PARSER_EOF)
11639 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011640 spacePop(ctxt);
11641 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011642 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011643 } else {
11644 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011645 }
Daniel Veillard65686452012-07-19 18:25:01 +080011646 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011647 break;
11648 }
11649 if (RAW == '>') {
11650 NEXT;
11651 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011652 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011653 "Couldn't find end of Start Tag %s\n",
11654 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011655 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011656 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011657 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011658 if (ctxt->sax2)
11659 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011660#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011661 else
11662 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011663#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011664
Daniel Veillarda880b122003-04-21 21:36:41 +000011665 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011666 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011667 break;
11668 }
11669 case XML_PARSER_CONTENT: {
11670 const xmlChar *test;
11671 unsigned int cons;
11672 if ((avail < 2) && (ctxt->inputNr == 1))
11673 goto done;
11674 cur = ctxt->input->cur[0];
11675 next = ctxt->input->cur[1];
11676
11677 test = CUR_PTR;
11678 cons = ctxt->input->consumed;
11679 if ((cur == '<') && (next == '/')) {
11680 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011681 break;
11682 } else if ((cur == '<') && (next == '?')) {
11683 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011684 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11685 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011686 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011687 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011688 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011689 ctxt->instate = XML_PARSER_CONTENT;
11690 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011691 } else if ((cur == '<') && (next != '!')) {
11692 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011693 break;
11694 } else if ((cur == '<') && (next == '!') &&
11695 (ctxt->input->cur[2] == '-') &&
11696 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011697 int term;
11698
11699 if (avail < 4)
11700 goto done;
11701 ctxt->input->cur += 4;
11702 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11703 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011704 if ((!terminate) && (term < 0)) {
11705 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011706 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011707 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011708 xmlParseComment(ctxt);
11709 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011710 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011711 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11712 (ctxt->input->cur[2] == '[') &&
11713 (ctxt->input->cur[3] == 'C') &&
11714 (ctxt->input->cur[4] == 'D') &&
11715 (ctxt->input->cur[5] == 'A') &&
11716 (ctxt->input->cur[6] == 'T') &&
11717 (ctxt->input->cur[7] == 'A') &&
11718 (ctxt->input->cur[8] == '[')) {
11719 SKIP(9);
11720 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011721 break;
11722 } else if ((cur == '<') && (next == '!') &&
11723 (avail < 9)) {
11724 goto done;
11725 } else if (cur == '&') {
11726 if ((!terminate) &&
11727 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11728 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011729 xmlParseReference(ctxt);
11730 } else {
11731 /* TODO Avoid the extra copy, handle directly !!! */
11732 /*
11733 * Goal of the following test is:
11734 * - minimize calls to the SAX 'character' callback
11735 * when they are mergeable
11736 * - handle an problem for isBlank when we only parse
11737 * a sequence of blank chars and the next one is
11738 * not available to check against '<' presence.
11739 * - tries to homogenize the differences in SAX
11740 * callbacks between the push and pull versions
11741 * of the parser.
11742 */
11743 if ((ctxt->inputNr == 1) &&
11744 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11745 if (!terminate) {
11746 if (ctxt->progressive) {
11747 if ((lastlt == NULL) ||
11748 (ctxt->input->cur > lastlt))
11749 goto done;
11750 } else if (xmlParseLookupSequence(ctxt,
11751 '<', 0, 0) < 0) {
11752 goto done;
11753 }
11754 }
11755 }
11756 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011757 xmlParseCharData(ctxt, 0);
11758 }
11759 /*
11760 * Pop-up of finished entities.
11761 */
11762 while ((RAW == 0) && (ctxt->inputNr > 1))
11763 xmlPopInput(ctxt);
11764 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011765 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11766 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011767 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011768 break;
11769 }
11770 break;
11771 }
11772 case XML_PARSER_END_TAG:
11773 if (avail < 2)
11774 goto done;
11775 if (!terminate) {
11776 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011777 /* > can be found unescaped in attribute values */
11778 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011779 goto done;
11780 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11781 goto done;
11782 }
11783 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011784 if (ctxt->sax2) {
11785 xmlParseEndTag2(ctxt,
11786 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11787 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011788 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011789 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011790 }
11791#ifdef LIBXML_SAX1_ENABLED
11792 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011793 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011794#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011795 if (ctxt->instate == XML_PARSER_EOF) {
11796 /* Nothing */
11797 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011798 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011799 } else {
11800 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011801 }
11802 break;
11803 case XML_PARSER_CDATA_SECTION: {
11804 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011805 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011806 * cdataBlock merge back contiguous callbacks.
11807 */
11808 int base;
11809
11810 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11811 if (base < 0) {
11812 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011813 int tmp;
11814
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011815 tmp = xmlCheckCdataPush(ctxt->input->cur,
David Kilzer4f8606c2016-01-05 13:38:09 -080011816 XML_PARSER_BIG_BUFFER_SIZE, 0);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011817 if (tmp < 0) {
11818 tmp = -tmp;
11819 ctxt->input->cur += tmp;
11820 goto encoding_error;
11821 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011822 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11823 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011824 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011825 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011826 else if (ctxt->sax->characters != NULL)
11827 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011828 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011829 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011830 if (ctxt->instate == XML_PARSER_EOF)
11831 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011832 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011833 ctxt->checkIndex = 0;
11834 }
11835 goto done;
11836 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011837 int tmp;
11838
David Kilzer4f8606c2016-01-05 13:38:09 -080011839 tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011840 if ((tmp < 0) || (tmp != base)) {
11841 tmp = -tmp;
11842 ctxt->input->cur += tmp;
11843 goto encoding_error;
11844 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011845 if ((ctxt->sax != NULL) && (base == 0) &&
11846 (ctxt->sax->cdataBlock != NULL) &&
11847 (!ctxt->disableSAX)) {
11848 /*
11849 * Special case to provide identical behaviour
11850 * between pull and push parsers on enpty CDATA
11851 * sections
11852 */
11853 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11854 (!strncmp((const char *)&ctxt->input->cur[-9],
11855 "<![CDATA[", 9)))
11856 ctxt->sax->cdataBlock(ctxt->userData,
11857 BAD_CAST "", 0);
11858 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011859 (!ctxt->disableSAX)) {
11860 if (ctxt->sax->cdataBlock != NULL)
11861 ctxt->sax->cdataBlock(ctxt->userData,
11862 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011863 else if (ctxt->sax->characters != NULL)
11864 ctxt->sax->characters(ctxt->userData,
11865 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011866 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011867 if (ctxt->instate == XML_PARSER_EOF)
11868 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011869 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011870 ctxt->checkIndex = 0;
11871 ctxt->instate = XML_PARSER_CONTENT;
11872#ifdef DEBUG_PUSH
11873 xmlGenericError(xmlGenericErrorContext,
11874 "PP: entering CONTENT\n");
11875#endif
11876 }
11877 break;
11878 }
Owen Taylor3473f882001-02-23 17:55:21 +000011879 case XML_PARSER_MISC:
11880 SKIP_BLANKS;
11881 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011882 avail = ctxt->input->length -
11883 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011884 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011885 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011886 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011887 if (avail < 2)
11888 goto done;
11889 cur = ctxt->input->cur[0];
11890 next = ctxt->input->cur[1];
11891 if ((cur == '<') && (next == '?')) {
11892 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011893 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11894 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011895 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011896 }
Owen Taylor3473f882001-02-23 17:55:21 +000011897#ifdef DEBUG_PUSH
11898 xmlGenericError(xmlGenericErrorContext,
11899 "PP: Parsing PI\n");
11900#endif
11901 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011902 if (ctxt->instate == XML_PARSER_EOF)
11903 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011904 ctxt->instate = XML_PARSER_MISC;
11905 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011906 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011907 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011908 (ctxt->input->cur[2] == '-') &&
11909 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011910 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011911 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11912 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011913 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011914 }
Owen Taylor3473f882001-02-23 17:55:21 +000011915#ifdef DEBUG_PUSH
11916 xmlGenericError(xmlGenericErrorContext,
11917 "PP: Parsing Comment\n");
11918#endif
11919 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011920 if (ctxt->instate == XML_PARSER_EOF)
11921 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011922 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011923 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011924 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011925 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011926 (ctxt->input->cur[2] == 'D') &&
11927 (ctxt->input->cur[3] == 'O') &&
11928 (ctxt->input->cur[4] == 'C') &&
11929 (ctxt->input->cur[5] == 'T') &&
11930 (ctxt->input->cur[6] == 'Y') &&
11931 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011932 (ctxt->input->cur[8] == 'E')) {
11933 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011934 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11935 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011936 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011937 }
Owen Taylor3473f882001-02-23 17:55:21 +000011938#ifdef DEBUG_PUSH
11939 xmlGenericError(xmlGenericErrorContext,
11940 "PP: Parsing internal subset\n");
11941#endif
11942 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011943 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011944 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011945 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011946 if (ctxt->instate == XML_PARSER_EOF)
11947 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011948 if (RAW == '[') {
11949 ctxt->instate = XML_PARSER_DTD;
11950#ifdef DEBUG_PUSH
11951 xmlGenericError(xmlGenericErrorContext,
11952 "PP: entering DTD\n");
11953#endif
11954 } else {
11955 /*
11956 * Create and update the external subset.
11957 */
11958 ctxt->inSubset = 2;
11959 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11960 (ctxt->sax->externalSubset != NULL))
11961 ctxt->sax->externalSubset(ctxt->userData,
11962 ctxt->intSubName, ctxt->extSubSystem,
11963 ctxt->extSubURI);
11964 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011965 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011966 ctxt->instate = XML_PARSER_PROLOG;
11967#ifdef DEBUG_PUSH
11968 xmlGenericError(xmlGenericErrorContext,
11969 "PP: entering PROLOG\n");
11970#endif
11971 }
11972 } else if ((cur == '<') && (next == '!') &&
11973 (avail < 9)) {
11974 goto done;
11975 } else {
11976 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011977 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011978 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011979#ifdef DEBUG_PUSH
11980 xmlGenericError(xmlGenericErrorContext,
11981 "PP: entering START_TAG\n");
11982#endif
11983 }
11984 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011985 case XML_PARSER_PROLOG:
11986 SKIP_BLANKS;
11987 if (ctxt->input->buf == NULL)
11988 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11989 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011990 avail = xmlBufUse(ctxt->input->buf->buffer) -
11991 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011992 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011993 goto done;
11994 cur = ctxt->input->cur[0];
11995 next = ctxt->input->cur[1];
11996 if ((cur == '<') && (next == '?')) {
11997 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011998 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11999 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000012000 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080012001 }
Owen Taylor3473f882001-02-23 17:55:21 +000012002#ifdef DEBUG_PUSH
12003 xmlGenericError(xmlGenericErrorContext,
12004 "PP: Parsing PI\n");
12005#endif
12006 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012007 if (ctxt->instate == XML_PARSER_EOF)
12008 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080012009 ctxt->instate = XML_PARSER_PROLOG;
12010 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012011 } else if ((cur == '<') && (next == '!') &&
12012 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12013 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080012014 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12015 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012016 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080012017 }
Owen Taylor3473f882001-02-23 17:55:21 +000012018#ifdef DEBUG_PUSH
12019 xmlGenericError(xmlGenericErrorContext,
12020 "PP: Parsing Comment\n");
12021#endif
12022 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012023 if (ctxt->instate == XML_PARSER_EOF)
12024 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012025 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080012026 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012027 } else if ((cur == '<') && (next == '!') &&
12028 (avail < 4)) {
12029 goto done;
12030 } else {
12031 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000012032 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080012033 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000012034 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000012035#ifdef DEBUG_PUSH
12036 xmlGenericError(xmlGenericErrorContext,
12037 "PP: entering START_TAG\n");
12038#endif
12039 }
12040 break;
12041 case XML_PARSER_EPILOG:
12042 SKIP_BLANKS;
12043 if (ctxt->input->buf == NULL)
12044 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12045 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012046 avail = xmlBufUse(ctxt->input->buf->buffer) -
12047 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000012048 if (avail < 2)
12049 goto done;
12050 cur = ctxt->input->cur[0];
12051 next = ctxt->input->cur[1];
12052 if ((cur == '<') && (next == '?')) {
12053 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080012054 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12055 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000012056 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080012057 }
Owen Taylor3473f882001-02-23 17:55:21 +000012058#ifdef DEBUG_PUSH
12059 xmlGenericError(xmlGenericErrorContext,
12060 "PP: Parsing PI\n");
12061#endif
12062 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012063 if (ctxt->instate == XML_PARSER_EOF)
12064 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012065 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080012066 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012067 } else if ((cur == '<') && (next == '!') &&
12068 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12069 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080012070 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12071 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012072 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080012073 }
Owen Taylor3473f882001-02-23 17:55:21 +000012074#ifdef DEBUG_PUSH
12075 xmlGenericError(xmlGenericErrorContext,
12076 "PP: Parsing Comment\n");
12077#endif
12078 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012079 if (ctxt->instate == XML_PARSER_EOF)
12080 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012081 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080012082 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012083 } else if ((cur == '<') && (next == '!') &&
12084 (avail < 4)) {
12085 goto done;
12086 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012087 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080012088 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012089#ifdef DEBUG_PUSH
12090 xmlGenericError(xmlGenericErrorContext,
12091 "PP: entering EOF\n");
12092#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012093 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012094 ctxt->sax->endDocument(ctxt->userData);
12095 goto done;
12096 }
12097 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012098 case XML_PARSER_DTD: {
12099 /*
12100 * Sorry but progressive parsing of the internal subset
12101 * is not expected to be supported. We first check that
12102 * the full content of the internal subset is available and
12103 * the parsing is launched only at that point.
12104 * Internal subset ends up with "']' S? '>'" in an unescaped
12105 * section and not in a ']]>' sequence which are conditional
12106 * sections (whoever argued to keep that crap in XML deserve
12107 * a place in hell !).
12108 */
12109 int base, i;
12110 xmlChar *buf;
12111 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012112 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000012113
12114 base = ctxt->input->cur - ctxt->input->base;
12115 if (base < 0) return(0);
12116 if (ctxt->checkIndex > base)
12117 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012118 buf = xmlBufContent(ctxt->input->buf->buffer);
12119 use = xmlBufUse(ctxt->input->buf->buffer);
12120 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000012121 if (quote != 0) {
12122 if (buf[base] == quote)
12123 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012124 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000012125 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012126 if ((quote == 0) && (buf[base] == '<')) {
12127 int found = 0;
12128 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012129 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000012130 (buf[base + 1] == '!') &&
12131 (buf[base + 2] == '-') &&
12132 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012133 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000012134 if ((buf[base] == '-') &&
12135 (buf[base + 1] == '-') &&
12136 (buf[base + 2] == '>')) {
12137 found = 1;
12138 base += 2;
12139 break;
12140 }
12141 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012142 if (!found) {
12143#if 0
12144 fprintf(stderr, "unfinished comment\n");
12145#endif
12146 break; /* for */
12147 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012148 continue;
12149 }
12150 }
Owen Taylor3473f882001-02-23 17:55:21 +000012151 if (buf[base] == '"') {
12152 quote = '"';
12153 continue;
12154 }
12155 if (buf[base] == '\'') {
12156 quote = '\'';
12157 continue;
12158 }
12159 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012160#if 0
12161 fprintf(stderr, "%c%c%c%c: ", buf[base],
12162 buf[base + 1], buf[base + 2], buf[base + 3]);
12163#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012164 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012165 break;
12166 if (buf[base + 1] == ']') {
12167 /* conditional crap, skip both ']' ! */
12168 base++;
12169 continue;
12170 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012171 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012172 if (buf[base + i] == '>') {
12173#if 0
12174 fprintf(stderr, "found\n");
12175#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012176 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012177 }
12178 if (!IS_BLANK_CH(buf[base + i])) {
12179#if 0
12180 fprintf(stderr, "not found\n");
12181#endif
12182 goto not_end_of_int_subset;
12183 }
Owen Taylor3473f882001-02-23 17:55:21 +000012184 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012185#if 0
12186 fprintf(stderr, "end of stream\n");
12187#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012188 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012189
Owen Taylor3473f882001-02-23 17:55:21 +000012190 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012191not_end_of_int_subset:
12192 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012193 }
12194 /*
12195 * We didn't found the end of the Internal subset
12196 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012197 if (quote == 0)
12198 ctxt->checkIndex = base;
12199 else
12200 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012201#ifdef DEBUG_PUSH
12202 if (next == 0)
12203 xmlGenericError(xmlGenericErrorContext,
12204 "PP: lookup of int subset end filed\n");
12205#endif
12206 goto done;
12207
12208found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012209 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012210 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012211 if (ctxt->instate == XML_PARSER_EOF)
12212 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012213 ctxt->inSubset = 2;
12214 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12215 (ctxt->sax->externalSubset != NULL))
12216 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12217 ctxt->extSubSystem, ctxt->extSubURI);
12218 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012219 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012220 if (ctxt->instate == XML_PARSER_EOF)
12221 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012222 ctxt->instate = XML_PARSER_PROLOG;
12223 ctxt->checkIndex = 0;
12224#ifdef DEBUG_PUSH
12225 xmlGenericError(xmlGenericErrorContext,
12226 "PP: entering PROLOG\n");
12227#endif
12228 break;
12229 }
12230 case XML_PARSER_COMMENT:
12231 xmlGenericError(xmlGenericErrorContext,
12232 "PP: internal error, state == COMMENT\n");
12233 ctxt->instate = XML_PARSER_CONTENT;
12234#ifdef DEBUG_PUSH
12235 xmlGenericError(xmlGenericErrorContext,
12236 "PP: entering CONTENT\n");
12237#endif
12238 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012239 case XML_PARSER_IGNORE:
12240 xmlGenericError(xmlGenericErrorContext,
12241 "PP: internal error, state == IGNORE");
12242 ctxt->instate = XML_PARSER_DTD;
12243#ifdef DEBUG_PUSH
12244 xmlGenericError(xmlGenericErrorContext,
12245 "PP: entering DTD\n");
12246#endif
12247 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012248 case XML_PARSER_PI:
12249 xmlGenericError(xmlGenericErrorContext,
12250 "PP: internal error, state == PI\n");
12251 ctxt->instate = XML_PARSER_CONTENT;
12252#ifdef DEBUG_PUSH
12253 xmlGenericError(xmlGenericErrorContext,
12254 "PP: entering CONTENT\n");
12255#endif
12256 break;
12257 case XML_PARSER_ENTITY_DECL:
12258 xmlGenericError(xmlGenericErrorContext,
12259 "PP: internal error, state == ENTITY_DECL\n");
12260 ctxt->instate = XML_PARSER_DTD;
12261#ifdef DEBUG_PUSH
12262 xmlGenericError(xmlGenericErrorContext,
12263 "PP: entering DTD\n");
12264#endif
12265 break;
12266 case XML_PARSER_ENTITY_VALUE:
12267 xmlGenericError(xmlGenericErrorContext,
12268 "PP: internal error, state == ENTITY_VALUE\n");
12269 ctxt->instate = XML_PARSER_CONTENT;
12270#ifdef DEBUG_PUSH
12271 xmlGenericError(xmlGenericErrorContext,
12272 "PP: entering DTD\n");
12273#endif
12274 break;
12275 case XML_PARSER_ATTRIBUTE_VALUE:
12276 xmlGenericError(xmlGenericErrorContext,
12277 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12278 ctxt->instate = XML_PARSER_START_TAG;
12279#ifdef DEBUG_PUSH
12280 xmlGenericError(xmlGenericErrorContext,
12281 "PP: entering START_TAG\n");
12282#endif
12283 break;
12284 case XML_PARSER_SYSTEM_LITERAL:
12285 xmlGenericError(xmlGenericErrorContext,
12286 "PP: internal error, state == SYSTEM_LITERAL\n");
12287 ctxt->instate = XML_PARSER_START_TAG;
12288#ifdef DEBUG_PUSH
12289 xmlGenericError(xmlGenericErrorContext,
12290 "PP: entering START_TAG\n");
12291#endif
12292 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012293 case XML_PARSER_PUBLIC_LITERAL:
12294 xmlGenericError(xmlGenericErrorContext,
12295 "PP: internal error, state == PUBLIC_LITERAL\n");
12296 ctxt->instate = XML_PARSER_START_TAG;
12297#ifdef DEBUG_PUSH
12298 xmlGenericError(xmlGenericErrorContext,
12299 "PP: entering START_TAG\n");
12300#endif
12301 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012302 }
12303 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012304done:
Owen Taylor3473f882001-02-23 17:55:21 +000012305#ifdef DEBUG_PUSH
12306 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12307#endif
12308 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012309encoding_error:
12310 {
12311 char buffer[150];
12312
12313 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12314 ctxt->input->cur[0], ctxt->input->cur[1],
12315 ctxt->input->cur[2], ctxt->input->cur[3]);
12316 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12317 "Input is not proper UTF-8, indicate encoding !\n%s",
12318 BAD_CAST buffer, NULL);
12319 }
12320 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012321}
12322
12323/**
Daniel Veillard65686452012-07-19 18:25:01 +080012324 * xmlParseCheckTransition:
12325 * @ctxt: an XML parser context
12326 * @chunk: a char array
12327 * @size: the size in byte of the chunk
12328 *
12329 * Check depending on the current parser state if the chunk given must be
12330 * processed immediately or one need more data to advance on parsing.
12331 *
12332 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12333 */
12334static int
12335xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12336 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12337 return(-1);
12338 if (ctxt->instate == XML_PARSER_START_TAG) {
12339 if (memchr(chunk, '>', size) != NULL)
12340 return(1);
12341 return(0);
12342 }
12343 if (ctxt->progressive == XML_PARSER_COMMENT) {
12344 if (memchr(chunk, '>', size) != NULL)
12345 return(1);
12346 return(0);
12347 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012348 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12349 if (memchr(chunk, '>', size) != NULL)
12350 return(1);
12351 return(0);
12352 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012353 if (ctxt->progressive == XML_PARSER_PI) {
12354 if (memchr(chunk, '>', size) != NULL)
12355 return(1);
12356 return(0);
12357 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012358 if (ctxt->instate == XML_PARSER_END_TAG) {
12359 if (memchr(chunk, '>', size) != NULL)
12360 return(1);
12361 return(0);
12362 }
12363 if ((ctxt->progressive == XML_PARSER_DTD) ||
12364 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012365 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012366 return(1);
12367 return(0);
12368 }
Daniel Veillard65686452012-07-19 18:25:01 +080012369 return(1);
12370}
12371
12372/**
Owen Taylor3473f882001-02-23 17:55:21 +000012373 * xmlParseChunk:
12374 * @ctxt: an XML parser context
12375 * @chunk: an char array
12376 * @size: the size in byte of the chunk
12377 * @terminate: last chunk indicator
12378 *
12379 * Parse a Chunk of memory
12380 *
12381 * Returns zero if no error, the xmlParserErrors otherwise.
12382 */
12383int
12384xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12385 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012386 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012387 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012388 size_t old_avail = 0;
12389 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012390
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012391 if (ctxt == NULL)
12392 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012393 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012394 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012395 if (ctxt->instate == XML_PARSER_EOF)
12396 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012397 if (ctxt->instate == XML_PARSER_START)
12398 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012399 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12400 (chunk[size - 1] == '\r')) {
12401 end_in_lf = 1;
12402 size--;
12403 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012404
12405xmldecl_done:
12406
Owen Taylor3473f882001-02-23 17:55:21 +000012407 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12408 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012409 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12410 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012411 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012412
Daniel Veillard65686452012-07-19 18:25:01 +080012413 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012414 /*
12415 * Specific handling if we autodetected an encoding, we should not
12416 * push more than the first line ... which depend on the encoding
12417 * And only push the rest once the final encoding was detected
12418 */
12419 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12420 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012421 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012422
12423 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12424 BAD_CAST "UTF-16")) ||
12425 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12426 BAD_CAST "UTF16")))
12427 len = 90;
12428 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12429 BAD_CAST "UCS-4")) ||
12430 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12431 BAD_CAST "UCS4")))
12432 len = 180;
12433
12434 if (ctxt->input->buf->rawconsumed < len)
12435 len -= ctxt->input->buf->rawconsumed;
12436
Raul Hudeaba9716a2010-03-15 10:13:29 +010012437 /*
12438 * Change size for reading the initial declaration only
12439 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12440 * will blindly copy extra bytes from memory.
12441 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012442 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012443 remain = size - len;
12444 size = len;
12445 } else {
12446 remain = 0;
12447 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012448 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012449 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012450 if (res < 0) {
12451 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012452 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012453 return (XML_PARSER_EOF);
12454 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012455 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012456#ifdef DEBUG_PUSH
12457 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12458#endif
12459
Owen Taylor3473f882001-02-23 17:55:21 +000012460 } else if (ctxt->instate != XML_PARSER_EOF) {
12461 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12462 xmlParserInputBufferPtr in = ctxt->input->buf;
12463 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12464 (in->raw != NULL)) {
12465 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012466 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12467 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012468
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012469 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012470 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012471 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012472 xmlGenericError(xmlGenericErrorContext,
12473 "xmlParseChunk: encoder error\n");
12474 return(XML_ERR_INVALID_ENCODING);
12475 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012476 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012477 }
12478 }
12479 }
Daniel Veillard65686452012-07-19 18:25:01 +080012480 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012481 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012482 } else {
12483 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12484 avail = xmlBufUse(ctxt->input->buf->buffer);
12485 /*
12486 * Depending on the current state it may not be such
12487 * a good idea to try parsing if there is nothing in the chunk
12488 * which would be worth doing a parser state transition and we
12489 * need to wait for more data
12490 */
12491 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12492 (old_avail == 0) || (avail == 0) ||
12493 (xmlParseCheckTransition(ctxt,
12494 (const char *)&ctxt->input->base[old_avail],
12495 avail - old_avail)))
12496 xmlParseTryOrFinish(ctxt, terminate);
12497 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012498 if (ctxt->instate == XML_PARSER_EOF)
12499 return(ctxt->errNo);
12500
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012501 if ((ctxt->input != NULL) &&
12502 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12503 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12504 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12505 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012506 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012507 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012508 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12509 return(ctxt->errNo);
12510
12511 if (remain != 0) {
12512 chunk += size;
12513 size = remain;
12514 remain = 0;
12515 goto xmldecl_done;
12516 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012517 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12518 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012519 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12520 ctxt->input);
12521 size_t current = ctxt->input->cur - ctxt->input->base;
12522
Daniel Veillarda617e242006-01-09 14:38:44 +000012523 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012524
12525 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12526 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012527 }
Owen Taylor3473f882001-02-23 17:55:21 +000012528 if (terminate) {
12529 /*
12530 * Check for termination
12531 */
Daniel Veillard65686452012-07-19 18:25:01 +080012532 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012533
12534 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012535 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012536 cur_avail = ctxt->input->length -
12537 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012538 else
Daniel Veillard65686452012-07-19 18:25:01 +080012539 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12540 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012541 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012542
Owen Taylor3473f882001-02-23 17:55:21 +000012543 if ((ctxt->instate != XML_PARSER_EOF) &&
12544 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012545 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012546 }
Daniel Veillard65686452012-07-19 18:25:01 +080012547 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012548 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012549 }
Owen Taylor3473f882001-02-23 17:55:21 +000012550 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012551 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012552 ctxt->sax->endDocument(ctxt->userData);
12553 }
12554 ctxt->instate = XML_PARSER_EOF;
12555 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012556 if (ctxt->wellFormed == 0)
12557 return((xmlParserErrors) ctxt->errNo);
12558 else
12559 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012560}
12561
12562/************************************************************************
12563 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012564 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012565 * *
12566 ************************************************************************/
12567
12568/**
Owen Taylor3473f882001-02-23 17:55:21 +000012569 * xmlCreatePushParserCtxt:
12570 * @sax: a SAX handler
12571 * @user_data: The user data returned on SAX callbacks
12572 * @chunk: a pointer to an array of chars
12573 * @size: number of chars in the array
12574 * @filename: an optional file name or URI
12575 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012576 * Create a parser context for using the XML parser in push mode.
12577 * If @buffer and @size are non-NULL, the data is used to detect
12578 * the encoding. The remaining characters will be parsed so they
12579 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012580 * To allow content encoding detection, @size should be >= 4
12581 * The value of @filename is used for fetching external entities
12582 * and error/warning reports.
12583 *
12584 * Returns the new parser context or NULL
12585 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012586
Owen Taylor3473f882001-02-23 17:55:21 +000012587xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012588xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012589 const char *chunk, int size, const char *filename) {
12590 xmlParserCtxtPtr ctxt;
12591 xmlParserInputPtr inputStream;
12592 xmlParserInputBufferPtr buf;
12593 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12594
12595 /*
12596 * plug some encoding conversion routines
12597 */
12598 if ((chunk != NULL) && (size >= 4))
12599 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12600
12601 buf = xmlAllocParserInputBuffer(enc);
12602 if (buf == NULL) return(NULL);
12603
12604 ctxt = xmlNewParserCtxt();
12605 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012606 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012607 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012608 return(NULL);
12609 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012610 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012611 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12612 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012613 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012614 xmlFreeParserInputBuffer(buf);
12615 xmlFreeParserCtxt(ctxt);
12616 return(NULL);
12617 }
Owen Taylor3473f882001-02-23 17:55:21 +000012618 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012619#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012620 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012621#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012622 xmlFree(ctxt->sax);
12623 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12624 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012625 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012626 xmlFreeParserInputBuffer(buf);
12627 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012628 return(NULL);
12629 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012630 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12631 if (sax->initialized == XML_SAX2_MAGIC)
12632 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12633 else
12634 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012635 if (user_data != NULL)
12636 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012637 }
Owen Taylor3473f882001-02-23 17:55:21 +000012638 if (filename == NULL) {
12639 ctxt->directory = NULL;
12640 } else {
12641 ctxt->directory = xmlParserGetDirectory(filename);
12642 }
12643
12644 inputStream = xmlNewInputStream(ctxt);
12645 if (inputStream == NULL) {
12646 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012647 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012648 return(NULL);
12649 }
12650
12651 if (filename == NULL)
12652 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012653 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012654 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012655 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012656 if (inputStream->filename == NULL) {
12657 xmlFreeParserCtxt(ctxt);
12658 xmlFreeParserInputBuffer(buf);
12659 return(NULL);
12660 }
12661 }
Owen Taylor3473f882001-02-23 17:55:21 +000012662 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012663 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012664 inputPush(ctxt, inputStream);
12665
William M. Brack3a1cd212005-02-11 14:35:54 +000012666 /*
12667 * If the caller didn't provide an initial 'chunk' for determining
12668 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12669 * that it can be automatically determined later
12670 */
12671 if ((size == 0) || (chunk == NULL)) {
12672 ctxt->charset = XML_CHAR_ENCODING_NONE;
12673 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012674 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12675 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012676
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012677 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012678
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012679 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012680#ifdef DEBUG_PUSH
12681 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12682#endif
12683 }
12684
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012685 if (enc != XML_CHAR_ENCODING_NONE) {
12686 xmlSwitchEncoding(ctxt, enc);
12687 }
12688
Owen Taylor3473f882001-02-23 17:55:21 +000012689 return(ctxt);
12690}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012691#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012692
12693/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012694 * xmlHaltParser:
12695 * @ctxt: an XML parser context
12696 *
12697 * Blocks further parser processing don't override error
12698 * for internal use
12699 */
12700static void
12701xmlHaltParser(xmlParserCtxtPtr ctxt) {
12702 if (ctxt == NULL)
12703 return;
12704 ctxt->instate = XML_PARSER_EOF;
12705 ctxt->disableSAX = 1;
12706 if (ctxt->input != NULL) {
12707 /*
12708 * in case there was a specific allocation deallocate before
12709 * overriding base
12710 */
12711 if (ctxt->input->free != NULL) {
12712 ctxt->input->free((xmlChar *) ctxt->input->base);
12713 ctxt->input->free = NULL;
12714 }
12715 ctxt->input->cur = BAD_CAST"";
12716 ctxt->input->base = ctxt->input->cur;
12717 }
12718}
12719
12720/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012721 * xmlStopParser:
12722 * @ctxt: an XML parser context
12723 *
12724 * Blocks further parser processing
12725 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012726void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012727xmlStopParser(xmlParserCtxtPtr ctxt) {
12728 if (ctxt == NULL)
12729 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012730 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012731 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012732}
12733
12734/**
Owen Taylor3473f882001-02-23 17:55:21 +000012735 * xmlCreateIOParserCtxt:
12736 * @sax: a SAX handler
12737 * @user_data: The user data returned on SAX callbacks
12738 * @ioread: an I/O read function
12739 * @ioclose: an I/O close function
12740 * @ioctx: an I/O handler
12741 * @enc: the charset encoding if known
12742 *
12743 * Create a parser context for using the XML parser with an existing
12744 * I/O stream
12745 *
12746 * Returns the new parser context or NULL
12747 */
12748xmlParserCtxtPtr
12749xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12750 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12751 void *ioctx, xmlCharEncoding enc) {
12752 xmlParserCtxtPtr ctxt;
12753 xmlParserInputPtr inputStream;
12754 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012755
Daniel Veillard42595322004-11-08 10:52:06 +000012756 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012757
12758 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012759 if (buf == NULL) {
12760 if (ioclose != NULL)
12761 ioclose(ioctx);
12762 return (NULL);
12763 }
Owen Taylor3473f882001-02-23 17:55:21 +000012764
12765 ctxt = xmlNewParserCtxt();
12766 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012767 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012768 return(NULL);
12769 }
12770 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012771#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012772 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012773#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012774 xmlFree(ctxt->sax);
12775 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12776 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012777 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012778 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012779 return(NULL);
12780 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012781 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12782 if (sax->initialized == XML_SAX2_MAGIC)
12783 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12784 else
12785 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012786 if (user_data != NULL)
12787 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012788 }
Owen Taylor3473f882001-02-23 17:55:21 +000012789
12790 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12791 if (inputStream == NULL) {
12792 xmlFreeParserCtxt(ctxt);
12793 return(NULL);
12794 }
12795 inputPush(ctxt, inputStream);
12796
12797 return(ctxt);
12798}
12799
Daniel Veillard4432df22003-09-28 18:58:27 +000012800#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012801/************************************************************************
12802 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012803 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012804 * *
12805 ************************************************************************/
12806
12807/**
12808 * xmlIOParseDTD:
12809 * @sax: the SAX handler block or NULL
12810 * @input: an Input Buffer
12811 * @enc: the charset encoding if known
12812 *
12813 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012814 *
Owen Taylor3473f882001-02-23 17:55:21 +000012815 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012816 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012817 */
12818
12819xmlDtdPtr
12820xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12821 xmlCharEncoding enc) {
12822 xmlDtdPtr ret = NULL;
12823 xmlParserCtxtPtr ctxt;
12824 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012825 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012826
12827 if (input == NULL)
12828 return(NULL);
12829
12830 ctxt = xmlNewParserCtxt();
12831 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012832 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012833 return(NULL);
12834 }
12835
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012836 /* We are loading a DTD */
12837 ctxt->options |= XML_PARSE_DTDLOAD;
12838
Owen Taylor3473f882001-02-23 17:55:21 +000012839 /*
12840 * Set-up the SAX context
12841 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012842 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012843 if (ctxt->sax != NULL)
12844 xmlFree(ctxt->sax);
12845 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012846 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012847 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012848 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012849
12850 /*
12851 * generate a parser input from the I/O handler
12852 */
12853
Daniel Veillard43caefb2003-12-07 19:32:22 +000012854 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012855 if (pinput == NULL) {
12856 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012857 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012858 xmlFreeParserCtxt(ctxt);
12859 return(NULL);
12860 }
12861
12862 /*
12863 * plug some encoding conversion routines here.
12864 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012865 if (xmlPushInput(ctxt, pinput) < 0) {
12866 if (sax != NULL) ctxt->sax = NULL;
12867 xmlFreeParserCtxt(ctxt);
12868 return(NULL);
12869 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012870 if (enc != XML_CHAR_ENCODING_NONE) {
12871 xmlSwitchEncoding(ctxt, enc);
12872 }
Owen Taylor3473f882001-02-23 17:55:21 +000012873
12874 pinput->filename = NULL;
12875 pinput->line = 1;
12876 pinput->col = 1;
12877 pinput->base = ctxt->input->cur;
12878 pinput->cur = ctxt->input->cur;
12879 pinput->free = NULL;
12880
12881 /*
12882 * let's parse that entity knowing it's an external subset.
12883 */
12884 ctxt->inSubset = 2;
12885 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012886 if (ctxt->myDoc == NULL) {
12887 xmlErrMemory(ctxt, "New Doc failed");
12888 return(NULL);
12889 }
12890 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012891 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12892 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012893
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012894 if ((enc == XML_CHAR_ENCODING_NONE) &&
12895 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012896 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012897 * Get the 4 first bytes and decode the charset
12898 * if enc != XML_CHAR_ENCODING_NONE
12899 * plug some encoding conversion routines.
12900 */
12901 start[0] = RAW;
12902 start[1] = NXT(1);
12903 start[2] = NXT(2);
12904 start[3] = NXT(3);
12905 enc = xmlDetectCharEncoding(start, 4);
12906 if (enc != XML_CHAR_ENCODING_NONE) {
12907 xmlSwitchEncoding(ctxt, enc);
12908 }
12909 }
12910
Owen Taylor3473f882001-02-23 17:55:21 +000012911 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12912
12913 if (ctxt->myDoc != NULL) {
12914 if (ctxt->wellFormed) {
12915 ret = ctxt->myDoc->extSubset;
12916 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012917 if (ret != NULL) {
12918 xmlNodePtr tmp;
12919
12920 ret->doc = NULL;
12921 tmp = ret->children;
12922 while (tmp != NULL) {
12923 tmp->doc = NULL;
12924 tmp = tmp->next;
12925 }
12926 }
Owen Taylor3473f882001-02-23 17:55:21 +000012927 } else {
12928 ret = NULL;
12929 }
12930 xmlFreeDoc(ctxt->myDoc);
12931 ctxt->myDoc = NULL;
12932 }
12933 if (sax != NULL) ctxt->sax = NULL;
12934 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012935
Owen Taylor3473f882001-02-23 17:55:21 +000012936 return(ret);
12937}
12938
12939/**
12940 * xmlSAXParseDTD:
12941 * @sax: the SAX handler block
12942 * @ExternalID: a NAME* containing the External ID of the DTD
12943 * @SystemID: a NAME* containing the URL to the DTD
12944 *
12945 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012946 *
Owen Taylor3473f882001-02-23 17:55:21 +000012947 * Returns the resulting xmlDtdPtr or NULL in case of error.
12948 */
12949
12950xmlDtdPtr
12951xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12952 const xmlChar *SystemID) {
12953 xmlDtdPtr ret = NULL;
12954 xmlParserCtxtPtr ctxt;
12955 xmlParserInputPtr input = NULL;
12956 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012957 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012958
12959 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12960
12961 ctxt = xmlNewParserCtxt();
12962 if (ctxt == NULL) {
12963 return(NULL);
12964 }
12965
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012966 /* We are loading a DTD */
12967 ctxt->options |= XML_PARSE_DTDLOAD;
12968
Owen Taylor3473f882001-02-23 17:55:21 +000012969 /*
12970 * Set-up the SAX context
12971 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012972 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012973 if (ctxt->sax != NULL)
12974 xmlFree(ctxt->sax);
12975 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012976 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012977 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012978
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012979 /*
12980 * Canonicalise the system ID
12981 */
12982 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012983 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012984 xmlFreeParserCtxt(ctxt);
12985 return(NULL);
12986 }
Owen Taylor3473f882001-02-23 17:55:21 +000012987
12988 /*
12989 * Ask the Entity resolver to load the damn thing
12990 */
12991
12992 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012993 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12994 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012995 if (input == NULL) {
12996 if (sax != NULL) ctxt->sax = NULL;
12997 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012998 if (systemIdCanonic != NULL)
12999 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000013000 return(NULL);
13001 }
13002
13003 /*
13004 * plug some encoding conversion routines here.
13005 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000013006 if (xmlPushInput(ctxt, input) < 0) {
13007 if (sax != NULL) ctxt->sax = NULL;
13008 xmlFreeParserCtxt(ctxt);
13009 if (systemIdCanonic != NULL)
13010 xmlFree(systemIdCanonic);
13011 return(NULL);
13012 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013013 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13014 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
13015 xmlSwitchEncoding(ctxt, enc);
13016 }
Owen Taylor3473f882001-02-23 17:55:21 +000013017
13018 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000013019 input->filename = (char *) systemIdCanonic;
13020 else
13021 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000013022 input->line = 1;
13023 input->col = 1;
13024 input->base = ctxt->input->cur;
13025 input->cur = ctxt->input->cur;
13026 input->free = NULL;
13027
13028 /*
13029 * let's parse that entity knowing it's an external subset.
13030 */
13031 ctxt->inSubset = 2;
13032 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000013033 if (ctxt->myDoc == NULL) {
13034 xmlErrMemory(ctxt, "New Doc failed");
13035 if (sax != NULL) ctxt->sax = NULL;
13036 xmlFreeParserCtxt(ctxt);
13037 return(NULL);
13038 }
13039 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000013040 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
13041 ExternalID, SystemID);
13042 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13043
13044 if (ctxt->myDoc != NULL) {
13045 if (ctxt->wellFormed) {
13046 ret = ctxt->myDoc->extSubset;
13047 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000013048 if (ret != NULL) {
13049 xmlNodePtr tmp;
13050
13051 ret->doc = NULL;
13052 tmp = ret->children;
13053 while (tmp != NULL) {
13054 tmp->doc = NULL;
13055 tmp = tmp->next;
13056 }
13057 }
Owen Taylor3473f882001-02-23 17:55:21 +000013058 } else {
13059 ret = NULL;
13060 }
13061 xmlFreeDoc(ctxt->myDoc);
13062 ctxt->myDoc = NULL;
13063 }
13064 if (sax != NULL) ctxt->sax = NULL;
13065 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013066
Owen Taylor3473f882001-02-23 17:55:21 +000013067 return(ret);
13068}
13069
Daniel Veillard4432df22003-09-28 18:58:27 +000013070
Owen Taylor3473f882001-02-23 17:55:21 +000013071/**
13072 * xmlParseDTD:
13073 * @ExternalID: a NAME* containing the External ID of the DTD
13074 * @SystemID: a NAME* containing the URL to the DTD
13075 *
13076 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000013077 *
Owen Taylor3473f882001-02-23 17:55:21 +000013078 * Returns the resulting xmlDtdPtr or NULL in case of error.
13079 */
13080
13081xmlDtdPtr
13082xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13083 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13084}
Daniel Veillard4432df22003-09-28 18:58:27 +000013085#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013086
13087/************************************************************************
13088 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013089 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000013090 * *
13091 ************************************************************************/
13092
13093/**
Owen Taylor3473f882001-02-23 17:55:21 +000013094 * xmlParseCtxtExternalEntity:
13095 * @ctx: the existing parsing context
13096 * @URL: the URL for the entity to load
13097 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013098 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013099 *
13100 * Parse an external general entity within an existing parsing context
13101 * An external general parsed entity is well-formed if it matches the
13102 * production labeled extParsedEnt.
13103 *
13104 * [78] extParsedEnt ::= TextDecl? content
13105 *
13106 * Returns 0 if the entity is well formed, -1 in case of args problem and
13107 * the parser error code otherwise
13108 */
13109
13110int
13111xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013112 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000013113 xmlParserCtxtPtr ctxt;
13114 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013115 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013116 xmlSAXHandlerPtr oldsax = NULL;
13117 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013118 xmlChar start[4];
13119 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013120
Daniel Veillardce682bc2004-11-05 17:22:25 +000013121 if (ctx == NULL) return(-1);
13122
Daniel Veillard0161e632008-08-28 15:36:32 +000013123 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13124 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013125 return(XML_ERR_ENTITY_LOOP);
13126 }
13127
Daniel Veillardcda96922001-08-21 10:56:31 +000013128 if (lst != NULL)
13129 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013130 if ((URL == NULL) && (ID == NULL))
13131 return(-1);
13132 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13133 return(-1);
13134
Rob Richards798743a2009-06-19 13:54:25 -040013135 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000013136 if (ctxt == NULL) {
13137 return(-1);
13138 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013139
Owen Taylor3473f882001-02-23 17:55:21 +000013140 oldsax = ctxt->sax;
13141 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013142 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013143 newDoc = xmlNewDoc(BAD_CAST "1.0");
13144 if (newDoc == NULL) {
13145 xmlFreeParserCtxt(ctxt);
13146 return(-1);
13147 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013148 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013149 if (ctx->myDoc->dict) {
13150 newDoc->dict = ctx->myDoc->dict;
13151 xmlDictReference(newDoc->dict);
13152 }
Owen Taylor3473f882001-02-23 17:55:21 +000013153 if (ctx->myDoc != NULL) {
13154 newDoc->intSubset = ctx->myDoc->intSubset;
13155 newDoc->extSubset = ctx->myDoc->extSubset;
13156 }
13157 if (ctx->myDoc->URL != NULL) {
13158 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13159 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013160 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13161 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013162 ctxt->sax = oldsax;
13163 xmlFreeParserCtxt(ctxt);
13164 newDoc->intSubset = NULL;
13165 newDoc->extSubset = NULL;
13166 xmlFreeDoc(newDoc);
13167 return(-1);
13168 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013169 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013170 nodePush(ctxt, newDoc->children);
13171 if (ctx->myDoc == NULL) {
13172 ctxt->myDoc = newDoc;
13173 } else {
13174 ctxt->myDoc = ctx->myDoc;
13175 newDoc->children->doc = ctx->myDoc;
13176 }
13177
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013178 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013179 * Get the 4 first bytes and decode the charset
13180 * if enc != XML_CHAR_ENCODING_NONE
13181 * plug some encoding conversion routines.
13182 */
13183 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013184 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13185 start[0] = RAW;
13186 start[1] = NXT(1);
13187 start[2] = NXT(2);
13188 start[3] = NXT(3);
13189 enc = xmlDetectCharEncoding(start, 4);
13190 if (enc != XML_CHAR_ENCODING_NONE) {
13191 xmlSwitchEncoding(ctxt, enc);
13192 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013193 }
13194
Owen Taylor3473f882001-02-23 17:55:21 +000013195 /*
13196 * Parse a possible text declaration first
13197 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013198 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013199 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013200 /*
13201 * An XML-1.0 document can't reference an entity not XML-1.0
13202 */
13203 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13204 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013205 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013206 "Version mismatch between document and entity\n");
13207 }
Owen Taylor3473f882001-02-23 17:55:21 +000013208 }
13209
13210 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080013211 * If the user provided its own SAX callbacks then reuse the
13212 * useData callback field, otherwise the expected setup in a
13213 * DOM builder is to have userData == ctxt
13214 */
13215 if (ctx->userData == ctx)
13216 ctxt->userData = ctxt;
13217 else
13218 ctxt->userData = ctx->userData;
13219
13220 /*
Owen Taylor3473f882001-02-23 17:55:21 +000013221 * Doing validity checking on chunk doesn't make sense
13222 */
13223 ctxt->instate = XML_PARSER_CONTENT;
13224 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013225 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013226 ctxt->loadsubset = ctx->loadsubset;
13227 ctxt->depth = ctx->depth + 1;
13228 ctxt->replaceEntities = ctx->replaceEntities;
13229 if (ctxt->validate) {
13230 ctxt->vctxt.error = ctx->vctxt.error;
13231 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000013232 } else {
13233 ctxt->vctxt.error = NULL;
13234 ctxt->vctxt.warning = NULL;
13235 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000013236 ctxt->vctxt.nodeTab = NULL;
13237 ctxt->vctxt.nodeNr = 0;
13238 ctxt->vctxt.nodeMax = 0;
13239 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013240 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13241 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013242 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13243 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13244 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013245 ctxt->dictNames = ctx->dictNames;
13246 ctxt->attsDefault = ctx->attsDefault;
13247 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013248 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013249
13250 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013251
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013252 ctx->validate = ctxt->validate;
13253 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013254 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013255 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013256 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013257 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013258 }
13259 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013260 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013261 }
13262
13263 if (!ctxt->wellFormed) {
13264 if (ctxt->errNo == 0)
13265 ret = 1;
13266 else
13267 ret = ctxt->errNo;
13268 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013269 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013270 xmlNodePtr cur;
13271
13272 /*
13273 * Return the newly created nodeset after unlinking it from
13274 * they pseudo parent.
13275 */
13276 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013277 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013278 while (cur != NULL) {
13279 cur->parent = NULL;
13280 cur = cur->next;
13281 }
13282 newDoc->children->children = NULL;
13283 }
13284 ret = 0;
13285 }
13286 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013287 ctxt->dict = NULL;
13288 ctxt->attsDefault = NULL;
13289 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013290 xmlFreeParserCtxt(ctxt);
13291 newDoc->intSubset = NULL;
13292 newDoc->extSubset = NULL;
13293 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013294
Owen Taylor3473f882001-02-23 17:55:21 +000013295 return(ret);
13296}
13297
13298/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013299 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013300 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013301 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013302 * @sax: the SAX handler bloc (possibly NULL)
13303 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13304 * @depth: Used for loop detection, use 0
13305 * @URL: the URL for the entity to load
13306 * @ID: the System ID for the entity to load
13307 * @list: the return value for the set of parsed nodes
13308 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013309 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013310 *
13311 * Returns 0 if the entity is well formed, -1 in case of args problem and
13312 * the parser error code otherwise
13313 */
13314
Daniel Veillard7d515752003-09-26 19:12:37 +000013315static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013316xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13317 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013318 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013319 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013320 xmlParserCtxtPtr ctxt;
13321 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013322 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013323 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013324 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013325 xmlChar start[4];
13326 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013327
Daniel Veillard0161e632008-08-28 15:36:32 +000013328 if (((depth > 40) &&
13329 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13330 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013331 return(XML_ERR_ENTITY_LOOP);
13332 }
13333
Owen Taylor3473f882001-02-23 17:55:21 +000013334 if (list != NULL)
13335 *list = NULL;
13336 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013337 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013338 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013339 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013340
13341
Rob Richards9c0aa472009-03-26 18:10:19 +000013342 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013343 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013344 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013345 if (oldctxt != NULL) {
13346 ctxt->_private = oldctxt->_private;
13347 ctxt->loadsubset = oldctxt->loadsubset;
13348 ctxt->validate = oldctxt->validate;
13349 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013350 ctxt->record_info = oldctxt->record_info;
13351 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13352 ctxt->node_seq.length = oldctxt->node_seq.length;
13353 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013354 } else {
13355 /*
13356 * Doing validity checking on chunk without context
13357 * doesn't make sense
13358 */
13359 ctxt->_private = NULL;
13360 ctxt->validate = 0;
13361 ctxt->external = 2;
13362 ctxt->loadsubset = 0;
13363 }
Owen Taylor3473f882001-02-23 17:55:21 +000013364 if (sax != NULL) {
13365 oldsax = ctxt->sax;
13366 ctxt->sax = sax;
13367 if (user_data != NULL)
13368 ctxt->userData = user_data;
13369 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013370 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013371 newDoc = xmlNewDoc(BAD_CAST "1.0");
13372 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013373 ctxt->node_seq.maximum = 0;
13374 ctxt->node_seq.length = 0;
13375 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013376 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013377 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013378 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013379 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013380 newDoc->intSubset = doc->intSubset;
13381 newDoc->extSubset = doc->extSubset;
13382 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013383 xmlDictReference(newDoc->dict);
13384
Owen Taylor3473f882001-02-23 17:55:21 +000013385 if (doc->URL != NULL) {
13386 newDoc->URL = xmlStrdup(doc->URL);
13387 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013388 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13389 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013390 if (sax != NULL)
13391 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013392 ctxt->node_seq.maximum = 0;
13393 ctxt->node_seq.length = 0;
13394 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013395 xmlFreeParserCtxt(ctxt);
13396 newDoc->intSubset = NULL;
13397 newDoc->extSubset = NULL;
13398 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013399 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013400 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013401 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013402 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013403 ctxt->myDoc = doc;
13404 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013405
Daniel Veillard0161e632008-08-28 15:36:32 +000013406 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013407 * Get the 4 first bytes and decode the charset
13408 * if enc != XML_CHAR_ENCODING_NONE
13409 * plug some encoding conversion routines.
13410 */
13411 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013412 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13413 start[0] = RAW;
13414 start[1] = NXT(1);
13415 start[2] = NXT(2);
13416 start[3] = NXT(3);
13417 enc = xmlDetectCharEncoding(start, 4);
13418 if (enc != XML_CHAR_ENCODING_NONE) {
13419 xmlSwitchEncoding(ctxt, enc);
13420 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013421 }
13422
Owen Taylor3473f882001-02-23 17:55:21 +000013423 /*
13424 * Parse a possible text declaration first
13425 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013426 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013427 xmlParseTextDecl(ctxt);
13428 }
13429
Owen Taylor3473f882001-02-23 17:55:21 +000013430 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013431 ctxt->depth = depth;
13432
13433 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013434
Daniel Veillard561b7f82002-03-20 21:55:57 +000013435 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013436 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013437 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013438 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013439 }
13440 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013441 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013442 }
13443
13444 if (!ctxt->wellFormed) {
13445 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013446 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013447 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013448 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013449 } else {
13450 if (list != NULL) {
13451 xmlNodePtr cur;
13452
13453 /*
13454 * Return the newly created nodeset after unlinking it from
13455 * they pseudo parent.
13456 */
13457 cur = newDoc->children->children;
13458 *list = cur;
13459 while (cur != NULL) {
13460 cur->parent = NULL;
13461 cur = cur->next;
13462 }
13463 newDoc->children->children = NULL;
13464 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013465 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013466 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013467
13468 /*
13469 * Record in the parent context the number of entities replacement
13470 * done when parsing that reference.
13471 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013472 if (oldctxt != NULL)
13473 oldctxt->nbentities += ctxt->nbentities;
13474
Daniel Veillard0161e632008-08-28 15:36:32 +000013475 /*
13476 * Also record the size of the entity parsed
13477 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013478 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013479 oldctxt->sizeentities += ctxt->input->consumed;
13480 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13481 }
13482 /*
13483 * And record the last error if any
13484 */
13485 if (ctxt->lastError.code != XML_ERR_OK)
13486 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13487
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013488 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013489 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013490 if (oldctxt != NULL) {
13491 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13492 oldctxt->node_seq.length = ctxt->node_seq.length;
13493 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13494 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013495 ctxt->node_seq.maximum = 0;
13496 ctxt->node_seq.length = 0;
13497 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013498 xmlFreeParserCtxt(ctxt);
13499 newDoc->intSubset = NULL;
13500 newDoc->extSubset = NULL;
13501 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013502
Owen Taylor3473f882001-02-23 17:55:21 +000013503 return(ret);
13504}
13505
Daniel Veillard81273902003-09-30 00:43:48 +000013506#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013507/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013508 * xmlParseExternalEntity:
13509 * @doc: the document the chunk pertains to
13510 * @sax: the SAX handler bloc (possibly NULL)
13511 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13512 * @depth: Used for loop detection, use 0
13513 * @URL: the URL for the entity to load
13514 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013515 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013516 *
13517 * Parse an external general entity
13518 * An external general parsed entity is well-formed if it matches the
13519 * production labeled extParsedEnt.
13520 *
13521 * [78] extParsedEnt ::= TextDecl? content
13522 *
13523 * Returns 0 if the entity is well formed, -1 in case of args problem and
13524 * the parser error code otherwise
13525 */
13526
13527int
13528xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013529 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013530 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013531 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013532}
13533
13534/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013535 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013536 * @doc: the document the chunk pertains to
13537 * @sax: the SAX handler bloc (possibly NULL)
13538 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13539 * @depth: Used for loop detection, use 0
13540 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013541 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013542 *
13543 * Parse a well-balanced chunk of an XML document
13544 * called by the parser
13545 * The allowed sequence for the Well Balanced Chunk is the one defined by
13546 * the content production in the XML grammar:
13547 *
13548 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13549 *
13550 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13551 * the parser error code otherwise
13552 */
13553
13554int
13555xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013556 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013557 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13558 depth, string, lst, 0 );
13559}
Daniel Veillard81273902003-09-30 00:43:48 +000013560#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013561
13562/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013563 * xmlParseBalancedChunkMemoryInternal:
13564 * @oldctxt: the existing parsing context
13565 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13566 * @user_data: the user data field for the parser context
13567 * @lst: the return value for the set of parsed nodes
13568 *
13569 *
13570 * Parse a well-balanced chunk of an XML document
13571 * called by the parser
13572 * The allowed sequence for the Well Balanced Chunk is the one defined by
13573 * the content production in the XML grammar:
13574 *
13575 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13576 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013577 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13578 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013579 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013580 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013581 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013582 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013583static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013584xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13585 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13586 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013587 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013588 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013589 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013590 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013591 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013592 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013593 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013594#ifdef SAX2
13595 int i;
13596#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013597
Daniel Veillard0161e632008-08-28 15:36:32 +000013598 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13599 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013600 return(XML_ERR_ENTITY_LOOP);
13601 }
13602
13603
13604 if (lst != NULL)
13605 *lst = NULL;
13606 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013607 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013608
13609 size = xmlStrlen(string);
13610
13611 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013612 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013613 if (user_data != NULL)
13614 ctxt->userData = user_data;
13615 else
13616 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013617 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13618 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013619 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13620 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13621 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013622
Daniel Veillard74eaec12009-08-26 15:57:20 +020013623#ifdef SAX2
13624 /* propagate namespaces down the entity */
13625 for (i = 0;i < oldctxt->nsNr;i += 2) {
13626 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13627 }
13628#endif
13629
Daniel Veillard328f48c2002-11-15 15:24:34 +000013630 oldsax = ctxt->sax;
13631 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013632 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013633 ctxt->replaceEntities = oldctxt->replaceEntities;
13634 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013635
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013636 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013637 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013638 newDoc = xmlNewDoc(BAD_CAST "1.0");
13639 if (newDoc == NULL) {
13640 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013641 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013642 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013643 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013644 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013645 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013646 newDoc->dict = ctxt->dict;
13647 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013648 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013649 } else {
13650 ctxt->myDoc = oldctxt->myDoc;
13651 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013652 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013653 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013654 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13655 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013656 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013657 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013658 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013659 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013660 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013661 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013662 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013663 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013664 ctxt->myDoc->children = NULL;
13665 ctxt->myDoc->last = NULL;
13666 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013667 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013668 ctxt->instate = XML_PARSER_CONTENT;
13669 ctxt->depth = oldctxt->depth + 1;
13670
Daniel Veillard328f48c2002-11-15 15:24:34 +000013671 ctxt->validate = 0;
13672 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013673 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13674 /*
13675 * ID/IDREF registration will be done in xmlValidateElement below
13676 */
13677 ctxt->loadsubset |= XML_SKIP_IDS;
13678 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013679 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013680 ctxt->attsDefault = oldctxt->attsDefault;
13681 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013682
Daniel Veillard68e9e742002-11-16 15:35:11 +000013683 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013684 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013685 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013686 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013687 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013688 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013689 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013690 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013691 }
13692
13693 if (!ctxt->wellFormed) {
13694 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013695 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013696 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013697 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013698 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013699 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013700 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013701
William M. Brack7b9154b2003-09-27 19:23:50 +000013702 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013703 xmlNodePtr cur;
13704
13705 /*
13706 * Return the newly created nodeset after unlinking it from
13707 * they pseudo parent.
13708 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013709 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013710 *lst = cur;
13711 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013712#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013713 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13714 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13715 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013716 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13717 oldctxt->myDoc, cur);
13718 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013719#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013720 cur->parent = NULL;
13721 cur = cur->next;
13722 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013723 ctxt->myDoc->children->children = NULL;
13724 }
13725 if (ctxt->myDoc != NULL) {
13726 xmlFreeNode(ctxt->myDoc->children);
13727 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013728 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013729 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013730
13731 /*
13732 * Record in the parent context the number of entities replacement
13733 * done when parsing that reference.
13734 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013735 if (oldctxt != NULL)
13736 oldctxt->nbentities += ctxt->nbentities;
13737
Daniel Veillard0161e632008-08-28 15:36:32 +000013738 /*
13739 * Also record the last error if any
13740 */
13741 if (ctxt->lastError.code != XML_ERR_OK)
13742 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13743
Daniel Veillard328f48c2002-11-15 15:24:34 +000013744 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013745 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013746 ctxt->attsDefault = NULL;
13747 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013748 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013749 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013750 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013751 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013752
Daniel Veillard328f48c2002-11-15 15:24:34 +000013753 return(ret);
13754}
13755
Daniel Veillard29b17482004-08-16 00:39:03 +000013756/**
13757 * xmlParseInNodeContext:
13758 * @node: the context node
13759 * @data: the input string
13760 * @datalen: the input string length in bytes
13761 * @options: a combination of xmlParserOption
13762 * @lst: the return value for the set of parsed nodes
13763 *
13764 * Parse a well-balanced chunk of an XML document
13765 * within the context (DTD, namespaces, etc ...) of the given node.
13766 *
13767 * The allowed sequence for the data is a Well Balanced Chunk defined by
13768 * the content production in the XML grammar:
13769 *
13770 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13771 *
13772 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13773 * error code otherwise
13774 */
13775xmlParserErrors
13776xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13777 int options, xmlNodePtr *lst) {
13778#ifdef SAX2
13779 xmlParserCtxtPtr ctxt;
13780 xmlDocPtr doc = NULL;
13781 xmlNodePtr fake, cur;
13782 int nsnr = 0;
13783
13784 xmlParserErrors ret = XML_ERR_OK;
13785
13786 /*
13787 * check all input parameters, grab the document
13788 */
13789 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13790 return(XML_ERR_INTERNAL_ERROR);
13791 switch (node->type) {
13792 case XML_ELEMENT_NODE:
13793 case XML_ATTRIBUTE_NODE:
13794 case XML_TEXT_NODE:
13795 case XML_CDATA_SECTION_NODE:
13796 case XML_ENTITY_REF_NODE:
13797 case XML_PI_NODE:
13798 case XML_COMMENT_NODE:
13799 case XML_DOCUMENT_NODE:
13800 case XML_HTML_DOCUMENT_NODE:
13801 break;
13802 default:
13803 return(XML_ERR_INTERNAL_ERROR);
13804
13805 }
13806 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13807 (node->type != XML_DOCUMENT_NODE) &&
13808 (node->type != XML_HTML_DOCUMENT_NODE))
13809 node = node->parent;
13810 if (node == NULL)
13811 return(XML_ERR_INTERNAL_ERROR);
13812 if (node->type == XML_ELEMENT_NODE)
13813 doc = node->doc;
13814 else
13815 doc = (xmlDocPtr) node;
13816 if (doc == NULL)
13817 return(XML_ERR_INTERNAL_ERROR);
13818
13819 /*
13820 * allocate a context and set-up everything not related to the
13821 * node position in the tree
13822 */
13823 if (doc->type == XML_DOCUMENT_NODE)
13824 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13825#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013826 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013827 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013828 /*
13829 * When parsing in context, it makes no sense to add implied
13830 * elements like html/body/etc...
13831 */
13832 options |= HTML_PARSE_NOIMPLIED;
13833 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013834#endif
13835 else
13836 return(XML_ERR_INTERNAL_ERROR);
13837
13838 if (ctxt == NULL)
13839 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013840
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013841 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013842 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13843 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13844 * we must wait until the last moment to free the original one.
13845 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013846 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013847 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013848 xmlDictFree(ctxt->dict);
13849 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013850 } else
13851 options |= XML_PARSE_NODICT;
13852
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013853 if (doc->encoding != NULL) {
13854 xmlCharEncodingHandlerPtr hdlr;
13855
13856 if (ctxt->encoding != NULL)
13857 xmlFree((xmlChar *) ctxt->encoding);
13858 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13859
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013860 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013861 if (hdlr != NULL) {
13862 xmlSwitchToEncoding(ctxt, hdlr);
13863 } else {
13864 return(XML_ERR_UNSUPPORTED_ENCODING);
13865 }
13866 }
13867
Daniel Veillard37334572008-07-31 08:20:02 +000013868 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013869 xmlDetectSAX2(ctxt);
13870 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013871 /* parsing in context, i.e. as within existing content */
13872 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013873
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013874 fake = xmlNewComment(NULL);
13875 if (fake == NULL) {
13876 xmlFreeParserCtxt(ctxt);
13877 return(XML_ERR_NO_MEMORY);
13878 }
13879 xmlAddChild(node, fake);
13880
Daniel Veillard29b17482004-08-16 00:39:03 +000013881 if (node->type == XML_ELEMENT_NODE) {
13882 nodePush(ctxt, node);
13883 /*
13884 * initialize the SAX2 namespaces stack
13885 */
13886 cur = node;
13887 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13888 xmlNsPtr ns = cur->nsDef;
13889 const xmlChar *iprefix, *ihref;
13890
13891 while (ns != NULL) {
13892 if (ctxt->dict) {
13893 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13894 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13895 } else {
13896 iprefix = ns->prefix;
13897 ihref = ns->href;
13898 }
13899
13900 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13901 nsPush(ctxt, iprefix, ihref);
13902 nsnr++;
13903 }
13904 ns = ns->next;
13905 }
13906 cur = cur->parent;
13907 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013908 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013909
13910 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13911 /*
13912 * ID/IDREF registration will be done in xmlValidateElement below
13913 */
13914 ctxt->loadsubset |= XML_SKIP_IDS;
13915 }
13916
Daniel Veillard499cc922006-01-18 17:22:35 +000013917#ifdef LIBXML_HTML_ENABLED
13918 if (doc->type == XML_HTML_DOCUMENT_NODE)
13919 __htmlParseContent(ctxt);
13920 else
13921#endif
13922 xmlParseContent(ctxt);
13923
Daniel Veillard29b17482004-08-16 00:39:03 +000013924 nsPop(ctxt, nsnr);
13925 if ((RAW == '<') && (NXT(1) == '/')) {
13926 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13927 } else if (RAW != 0) {
13928 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13929 }
13930 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13931 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13932 ctxt->wellFormed = 0;
13933 }
13934
13935 if (!ctxt->wellFormed) {
13936 if (ctxt->errNo == 0)
13937 ret = XML_ERR_INTERNAL_ERROR;
13938 else
13939 ret = (xmlParserErrors)ctxt->errNo;
13940 } else {
13941 ret = XML_ERR_OK;
13942 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013943
Daniel Veillard29b17482004-08-16 00:39:03 +000013944 /*
13945 * Return the newly created nodeset after unlinking it from
13946 * the pseudo sibling.
13947 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013948
Daniel Veillard29b17482004-08-16 00:39:03 +000013949 cur = fake->next;
13950 fake->next = NULL;
13951 node->last = fake;
13952
13953 if (cur != NULL) {
13954 cur->prev = NULL;
13955 }
13956
13957 *lst = cur;
13958
13959 while (cur != NULL) {
13960 cur->parent = NULL;
13961 cur = cur->next;
13962 }
13963
13964 xmlUnlinkNode(fake);
13965 xmlFreeNode(fake);
13966
13967
13968 if (ret != XML_ERR_OK) {
13969 xmlFreeNodeList(*lst);
13970 *lst = NULL;
13971 }
William M. Brackc3f81342004-10-03 01:22:44 +000013972
William M. Brackb7b54de2004-10-06 16:38:01 +000013973 if (doc->dict != NULL)
13974 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013975 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013976
Daniel Veillard29b17482004-08-16 00:39:03 +000013977 return(ret);
13978#else /* !SAX2 */
13979 return(XML_ERR_INTERNAL_ERROR);
13980#endif
13981}
13982
Daniel Veillard81273902003-09-30 00:43:48 +000013983#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013984/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013985 * xmlParseBalancedChunkMemoryRecover:
13986 * @doc: the document the chunk pertains to
13987 * @sax: the SAX handler bloc (possibly NULL)
13988 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13989 * @depth: Used for loop detection, use 0
13990 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13991 * @lst: the return value for the set of parsed nodes
13992 * @recover: return nodes even if the data is broken (use 0)
13993 *
13994 *
13995 * Parse a well-balanced chunk of an XML document
13996 * called by the parser
13997 * The allowed sequence for the Well Balanced Chunk is the one defined by
13998 * the content production in the XML grammar:
13999 *
14000 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
14001 *
14002 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
14003 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000014004 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000014005 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000014006 * the parsed chunk is not well balanced, assuming the parsing succeeded to
14007 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000014008 */
14009int
14010xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000014011 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000014012 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000014013 xmlParserCtxtPtr ctxt;
14014 xmlDocPtr newDoc;
14015 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014016 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000014017 int size;
14018 int ret = 0;
14019
Daniel Veillard0161e632008-08-28 15:36:32 +000014020 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000014021 return(XML_ERR_ENTITY_LOOP);
14022 }
14023
14024
Daniel Veillardcda96922001-08-21 10:56:31 +000014025 if (lst != NULL)
14026 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014027 if (string == NULL)
14028 return(-1);
14029
14030 size = xmlStrlen(string);
14031
14032 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
14033 if (ctxt == NULL) return(-1);
14034 ctxt->userData = ctxt;
14035 if (sax != NULL) {
14036 oldsax = ctxt->sax;
14037 ctxt->sax = sax;
14038 if (user_data != NULL)
14039 ctxt->userData = user_data;
14040 }
14041 newDoc = xmlNewDoc(BAD_CAST "1.0");
14042 if (newDoc == NULL) {
14043 xmlFreeParserCtxt(ctxt);
14044 return(-1);
14045 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000014046 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014047 if ((doc != NULL) && (doc->dict != NULL)) {
14048 xmlDictFree(ctxt->dict);
14049 ctxt->dict = doc->dict;
14050 xmlDictReference(ctxt->dict);
14051 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14052 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14053 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14054 ctxt->dictNames = 1;
14055 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000014056 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014057 }
Owen Taylor3473f882001-02-23 17:55:21 +000014058 if (doc != NULL) {
14059 newDoc->intSubset = doc->intSubset;
14060 newDoc->extSubset = doc->extSubset;
14061 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014062 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14063 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000014064 if (sax != NULL)
14065 ctxt->sax = oldsax;
14066 xmlFreeParserCtxt(ctxt);
14067 newDoc->intSubset = NULL;
14068 newDoc->extSubset = NULL;
14069 xmlFreeDoc(newDoc);
14070 return(-1);
14071 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014072 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14073 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000014074 if (doc == NULL) {
14075 ctxt->myDoc = newDoc;
14076 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000014077 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000014078 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000014079 /* Ensure that doc has XML spec namespace */
14080 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14081 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000014082 }
14083 ctxt->instate = XML_PARSER_CONTENT;
14084 ctxt->depth = depth;
14085
14086 /*
14087 * Doing validity checking on chunk doesn't make sense
14088 */
14089 ctxt->validate = 0;
14090 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014091 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014092
Daniel Veillardb39bc392002-10-26 19:29:51 +000014093 if ( doc != NULL ){
14094 content = doc->children;
14095 doc->children = NULL;
14096 xmlParseContent(ctxt);
14097 doc->children = content;
14098 }
14099 else {
14100 xmlParseContent(ctxt);
14101 }
Owen Taylor3473f882001-02-23 17:55:21 +000014102 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014103 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014104 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014105 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014106 }
14107 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014108 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014109 }
14110
14111 if (!ctxt->wellFormed) {
14112 if (ctxt->errNo == 0)
14113 ret = 1;
14114 else
14115 ret = ctxt->errNo;
14116 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000014117 ret = 0;
14118 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014119
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014120 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14121 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000014122
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014123 /*
14124 * Return the newly created nodeset after unlinking it from
14125 * they pseudo parent.
14126 */
14127 cur = newDoc->children->children;
14128 *lst = cur;
14129 while (cur != NULL) {
14130 xmlSetTreeDoc(cur, doc);
14131 cur->parent = NULL;
14132 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000014133 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014134 newDoc->children->children = NULL;
14135 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014136
14137 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000014138 ctxt->sax = oldsax;
14139 xmlFreeParserCtxt(ctxt);
14140 newDoc->intSubset = NULL;
14141 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000014142 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014143 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000014144
Owen Taylor3473f882001-02-23 17:55:21 +000014145 return(ret);
14146}
14147
14148/**
14149 * xmlSAXParseEntity:
14150 * @sax: the SAX handler block
14151 * @filename: the filename
14152 *
14153 * parse an XML external entity out of context and build a tree.
14154 * It use the given SAX function block to handle the parsing callback.
14155 * If sax is NULL, fallback to the default DOM tree building routines.
14156 *
14157 * [78] extParsedEnt ::= TextDecl? content
14158 *
14159 * This correspond to a "Well Balanced" chunk
14160 *
14161 * Returns the resulting document tree
14162 */
14163
14164xmlDocPtr
14165xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14166 xmlDocPtr ret;
14167 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014168
14169 ctxt = xmlCreateFileParserCtxt(filename);
14170 if (ctxt == NULL) {
14171 return(NULL);
14172 }
14173 if (sax != NULL) {
14174 if (ctxt->sax != NULL)
14175 xmlFree(ctxt->sax);
14176 ctxt->sax = sax;
14177 ctxt->userData = NULL;
14178 }
14179
Owen Taylor3473f882001-02-23 17:55:21 +000014180 xmlParseExtParsedEnt(ctxt);
14181
14182 if (ctxt->wellFormed)
14183 ret = ctxt->myDoc;
14184 else {
14185 ret = NULL;
14186 xmlFreeDoc(ctxt->myDoc);
14187 ctxt->myDoc = NULL;
14188 }
14189 if (sax != NULL)
14190 ctxt->sax = NULL;
14191 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000014192
Owen Taylor3473f882001-02-23 17:55:21 +000014193 return(ret);
14194}
14195
14196/**
14197 * xmlParseEntity:
14198 * @filename: the filename
14199 *
14200 * parse an XML external entity out of context and build a tree.
14201 *
14202 * [78] extParsedEnt ::= TextDecl? content
14203 *
14204 * This correspond to a "Well Balanced" chunk
14205 *
14206 * Returns the resulting document tree
14207 */
14208
14209xmlDocPtr
14210xmlParseEntity(const char *filename) {
14211 return(xmlSAXParseEntity(NULL, filename));
14212}
Daniel Veillard81273902003-09-30 00:43:48 +000014213#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014214
14215/**
Rob Richards9c0aa472009-03-26 18:10:19 +000014216 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000014217 * @URL: the entity URL
14218 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000014219 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000014220 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000014221 *
14222 * Create a parser context for an external entity
14223 * Automatic support for ZLIB/Compress compressed document is provided
14224 * by default if found at compile-time.
14225 *
14226 * Returns the new parser context or NULL
14227 */
Rob Richards9c0aa472009-03-26 18:10:19 +000014228static xmlParserCtxtPtr
14229xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14230 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000014231 xmlParserCtxtPtr ctxt;
14232 xmlParserInputPtr inputStream;
14233 char *directory = NULL;
14234 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000014235
Owen Taylor3473f882001-02-23 17:55:21 +000014236 ctxt = xmlNewParserCtxt();
14237 if (ctxt == NULL) {
14238 return(NULL);
14239 }
14240
Daniel Veillard48247b42009-07-10 16:12:46 +020014241 if (pctx != NULL) {
14242 ctxt->options = pctx->options;
14243 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014244 }
14245
Owen Taylor3473f882001-02-23 17:55:21 +000014246 uri = xmlBuildURI(URL, base);
14247
14248 if (uri == NULL) {
14249 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14250 if (inputStream == NULL) {
14251 xmlFreeParserCtxt(ctxt);
14252 return(NULL);
14253 }
14254
14255 inputPush(ctxt, inputStream);
14256
14257 if ((ctxt->directory == NULL) && (directory == NULL))
14258 directory = xmlParserGetDirectory((char *)URL);
14259 if ((ctxt->directory == NULL) && (directory != NULL))
14260 ctxt->directory = directory;
14261 } else {
14262 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14263 if (inputStream == NULL) {
14264 xmlFree(uri);
14265 xmlFreeParserCtxt(ctxt);
14266 return(NULL);
14267 }
14268
14269 inputPush(ctxt, inputStream);
14270
14271 if ((ctxt->directory == NULL) && (directory == NULL))
14272 directory = xmlParserGetDirectory((char *)uri);
14273 if ((ctxt->directory == NULL) && (directory != NULL))
14274 ctxt->directory = directory;
14275 xmlFree(uri);
14276 }
Owen Taylor3473f882001-02-23 17:55:21 +000014277 return(ctxt);
14278}
14279
Rob Richards9c0aa472009-03-26 18:10:19 +000014280/**
14281 * xmlCreateEntityParserCtxt:
14282 * @URL: the entity URL
14283 * @ID: the entity PUBLIC ID
14284 * @base: a possible base for the target URI
14285 *
14286 * Create a parser context for an external entity
14287 * Automatic support for ZLIB/Compress compressed document is provided
14288 * by default if found at compile-time.
14289 *
14290 * Returns the new parser context or NULL
14291 */
14292xmlParserCtxtPtr
14293xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14294 const xmlChar *base) {
14295 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14296
14297}
14298
Owen Taylor3473f882001-02-23 17:55:21 +000014299/************************************************************************
14300 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014301 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014302 * *
14303 ************************************************************************/
14304
14305/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014306 * xmlCreateURLParserCtxt:
14307 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014308 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014309 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014310 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014311 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014312 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014313 *
14314 * Returns the new parser context or NULL
14315 */
14316xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014317xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014318{
14319 xmlParserCtxtPtr ctxt;
14320 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014321 char *directory = NULL;
14322
Owen Taylor3473f882001-02-23 17:55:21 +000014323 ctxt = xmlNewParserCtxt();
14324 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014325 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014326 return(NULL);
14327 }
14328
Daniel Veillarddf292f72005-01-16 19:00:15 +000014329 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014330 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014331 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014332
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014333 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014334 if (inputStream == NULL) {
14335 xmlFreeParserCtxt(ctxt);
14336 return(NULL);
14337 }
14338
Owen Taylor3473f882001-02-23 17:55:21 +000014339 inputPush(ctxt, inputStream);
14340 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014341 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014342 if ((ctxt->directory == NULL) && (directory != NULL))
14343 ctxt->directory = directory;
14344
14345 return(ctxt);
14346}
14347
Daniel Veillard61b93382003-11-03 14:28:31 +000014348/**
14349 * xmlCreateFileParserCtxt:
14350 * @filename: the filename
14351 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014352 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014353 * Automatic support for ZLIB/Compress compressed document is provided
14354 * by default if found at compile-time.
14355 *
14356 * Returns the new parser context or NULL
14357 */
14358xmlParserCtxtPtr
14359xmlCreateFileParserCtxt(const char *filename)
14360{
14361 return(xmlCreateURLParserCtxt(filename, 0));
14362}
14363
Daniel Veillard81273902003-09-30 00:43:48 +000014364#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014365/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014366 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014367 * @sax: the SAX handler block
14368 * @filename: the filename
14369 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14370 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014371 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014372 *
14373 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14374 * compressed document is provided by default if found at compile-time.
14375 * It use the given SAX function block to handle the parsing callback.
14376 * If sax is NULL, fallback to the default DOM tree building routines.
14377 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014378 * User data (void *) is stored within the parser context in the
14379 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014380 *
Owen Taylor3473f882001-02-23 17:55:21 +000014381 * Returns the resulting document tree
14382 */
14383
14384xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014385xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14386 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014387 xmlDocPtr ret;
14388 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014389
Daniel Veillard635ef722001-10-29 11:48:19 +000014390 xmlInitParser();
14391
Owen Taylor3473f882001-02-23 17:55:21 +000014392 ctxt = xmlCreateFileParserCtxt(filename);
14393 if (ctxt == NULL) {
14394 return(NULL);
14395 }
14396 if (sax != NULL) {
14397 if (ctxt->sax != NULL)
14398 xmlFree(ctxt->sax);
14399 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014400 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014401 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014402 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014403 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014404 }
Owen Taylor3473f882001-02-23 17:55:21 +000014405
Daniel Veillard37d2d162008-03-14 10:54:00 +000014406 if (ctxt->directory == NULL)
14407 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014408
Daniel Veillarddad3f682002-11-17 16:47:27 +000014409 ctxt->recovery = recovery;
14410
Owen Taylor3473f882001-02-23 17:55:21 +000014411 xmlParseDocument(ctxt);
14412
William M. Brackc07329e2003-09-08 01:57:30 +000014413 if ((ctxt->wellFormed) || recovery) {
14414 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014415 if (ret != NULL) {
14416 if (ctxt->input->buf->compressed > 0)
14417 ret->compression = 9;
14418 else
14419 ret->compression = ctxt->input->buf->compressed;
14420 }
William M. Brackc07329e2003-09-08 01:57:30 +000014421 }
Owen Taylor3473f882001-02-23 17:55:21 +000014422 else {
14423 ret = NULL;
14424 xmlFreeDoc(ctxt->myDoc);
14425 ctxt->myDoc = NULL;
14426 }
14427 if (sax != NULL)
14428 ctxt->sax = NULL;
14429 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014430
Owen Taylor3473f882001-02-23 17:55:21 +000014431 return(ret);
14432}
14433
14434/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014435 * xmlSAXParseFile:
14436 * @sax: the SAX handler block
14437 * @filename: the filename
14438 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14439 * documents
14440 *
14441 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14442 * compressed document is provided by default if found at compile-time.
14443 * It use the given SAX function block to handle the parsing callback.
14444 * If sax is NULL, fallback to the default DOM tree building routines.
14445 *
14446 * Returns the resulting document tree
14447 */
14448
14449xmlDocPtr
14450xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14451 int recovery) {
14452 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14453}
14454
14455/**
Owen Taylor3473f882001-02-23 17:55:21 +000014456 * xmlRecoverDoc:
14457 * @cur: a pointer to an array of xmlChar
14458 *
14459 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014460 * In the case the document is not Well Formed, a attempt to build a
14461 * tree is tried anyway
14462 *
14463 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014464 */
14465
14466xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014467xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014468 return(xmlSAXParseDoc(NULL, cur, 1));
14469}
14470
14471/**
14472 * xmlParseFile:
14473 * @filename: the filename
14474 *
14475 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14476 * compressed document is provided by default if found at compile-time.
14477 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014478 * Returns the resulting document tree if the file was wellformed,
14479 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014480 */
14481
14482xmlDocPtr
14483xmlParseFile(const char *filename) {
14484 return(xmlSAXParseFile(NULL, filename, 0));
14485}
14486
14487/**
14488 * xmlRecoverFile:
14489 * @filename: the filename
14490 *
14491 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14492 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014493 * In the case the document is not Well Formed, it attempts to build
14494 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014495 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014496 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014497 */
14498
14499xmlDocPtr
14500xmlRecoverFile(const char *filename) {
14501 return(xmlSAXParseFile(NULL, filename, 1));
14502}
14503
14504
14505/**
14506 * xmlSetupParserForBuffer:
14507 * @ctxt: an XML parser context
14508 * @buffer: a xmlChar * buffer
14509 * @filename: a file name
14510 *
14511 * Setup the parser context to parse a new buffer; Clears any prior
14512 * contents from the parser context. The buffer parameter must not be
14513 * NULL, but the filename parameter can be
14514 */
14515void
14516xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14517 const char* filename)
14518{
14519 xmlParserInputPtr input;
14520
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014521 if ((ctxt == NULL) || (buffer == NULL))
14522 return;
14523
Owen Taylor3473f882001-02-23 17:55:21 +000014524 input = xmlNewInputStream(ctxt);
14525 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014526 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014527 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014528 return;
14529 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014530
Owen Taylor3473f882001-02-23 17:55:21 +000014531 xmlClearParserCtxt(ctxt);
14532 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014533 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014534 input->base = buffer;
14535 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014536 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014537 inputPush(ctxt, input);
14538}
14539
14540/**
14541 * xmlSAXUserParseFile:
14542 * @sax: a SAX handler
14543 * @user_data: The user data returned on SAX callbacks
14544 * @filename: a file name
14545 *
14546 * parse an XML file and call the given SAX handler routines.
14547 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014548 *
Owen Taylor3473f882001-02-23 17:55:21 +000014549 * Returns 0 in case of success or a error number otherwise
14550 */
14551int
14552xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14553 const char *filename) {
14554 int ret = 0;
14555 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014556
Owen Taylor3473f882001-02-23 17:55:21 +000014557 ctxt = xmlCreateFileParserCtxt(filename);
14558 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014559 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014560 xmlFree(ctxt->sax);
14561 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014562 xmlDetectSAX2(ctxt);
14563
Owen Taylor3473f882001-02-23 17:55:21 +000014564 if (user_data != NULL)
14565 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014566
Owen Taylor3473f882001-02-23 17:55:21 +000014567 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014568
Owen Taylor3473f882001-02-23 17:55:21 +000014569 if (ctxt->wellFormed)
14570 ret = 0;
14571 else {
14572 if (ctxt->errNo != 0)
14573 ret = ctxt->errNo;
14574 else
14575 ret = -1;
14576 }
14577 if (sax != NULL)
14578 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014579 if (ctxt->myDoc != NULL) {
14580 xmlFreeDoc(ctxt->myDoc);
14581 ctxt->myDoc = NULL;
14582 }
Owen Taylor3473f882001-02-23 17:55:21 +000014583 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014584
Owen Taylor3473f882001-02-23 17:55:21 +000014585 return ret;
14586}
Daniel Veillard81273902003-09-30 00:43:48 +000014587#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014588
14589/************************************************************************
14590 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014591 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014592 * *
14593 ************************************************************************/
14594
14595/**
14596 * xmlCreateMemoryParserCtxt:
14597 * @buffer: a pointer to a char array
14598 * @size: the size of the array
14599 *
14600 * Create a parser context for an XML in-memory document.
14601 *
14602 * Returns the new parser context or NULL
14603 */
14604xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014605xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014606 xmlParserCtxtPtr ctxt;
14607 xmlParserInputPtr input;
14608 xmlParserInputBufferPtr buf;
14609
14610 if (buffer == NULL)
14611 return(NULL);
14612 if (size <= 0)
14613 return(NULL);
14614
14615 ctxt = xmlNewParserCtxt();
14616 if (ctxt == NULL)
14617 return(NULL);
14618
Daniel Veillard53350552003-09-18 13:35:51 +000014619 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014620 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014621 if (buf == NULL) {
14622 xmlFreeParserCtxt(ctxt);
14623 return(NULL);
14624 }
Owen Taylor3473f882001-02-23 17:55:21 +000014625
14626 input = xmlNewInputStream(ctxt);
14627 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014628 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014629 xmlFreeParserCtxt(ctxt);
14630 return(NULL);
14631 }
14632
14633 input->filename = NULL;
14634 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014635 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014636
14637 inputPush(ctxt, input);
14638 return(ctxt);
14639}
14640
Daniel Veillard81273902003-09-30 00:43:48 +000014641#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014642/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014643 * xmlSAXParseMemoryWithData:
14644 * @sax: the SAX handler block
14645 * @buffer: an pointer to a char array
14646 * @size: the size of the array
14647 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14648 * documents
14649 * @data: the userdata
14650 *
14651 * parse an XML in-memory block and use the given SAX function block
14652 * to handle the parsing callback. If sax is NULL, fallback to the default
14653 * DOM tree building routines.
14654 *
14655 * User data (void *) is stored within the parser context in the
14656 * context's _private member, so it is available nearly everywhere in libxml
14657 *
14658 * Returns the resulting document tree
14659 */
14660
14661xmlDocPtr
14662xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14663 int size, int recovery, void *data) {
14664 xmlDocPtr ret;
14665 xmlParserCtxtPtr ctxt;
14666
Daniel Veillardab2a7632009-07-09 08:45:03 +020014667 xmlInitParser();
14668
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014669 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14670 if (ctxt == NULL) return(NULL);
14671 if (sax != NULL) {
14672 if (ctxt->sax != NULL)
14673 xmlFree(ctxt->sax);
14674 ctxt->sax = sax;
14675 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014676 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014677 if (data!=NULL) {
14678 ctxt->_private=data;
14679 }
14680
Daniel Veillardadba5f12003-04-04 16:09:01 +000014681 ctxt->recovery = recovery;
14682
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014683 xmlParseDocument(ctxt);
14684
14685 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14686 else {
14687 ret = NULL;
14688 xmlFreeDoc(ctxt->myDoc);
14689 ctxt->myDoc = NULL;
14690 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014691 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014692 ctxt->sax = NULL;
14693 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014694
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014695 return(ret);
14696}
14697
14698/**
Owen Taylor3473f882001-02-23 17:55:21 +000014699 * xmlSAXParseMemory:
14700 * @sax: the SAX handler block
14701 * @buffer: an pointer to a char array
14702 * @size: the size of the array
14703 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14704 * documents
14705 *
14706 * parse an XML in-memory block and use the given SAX function block
14707 * to handle the parsing callback. If sax is NULL, fallback to the default
14708 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014709 *
Owen Taylor3473f882001-02-23 17:55:21 +000014710 * Returns the resulting document tree
14711 */
14712xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014713xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14714 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014715 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014716}
14717
14718/**
14719 * xmlParseMemory:
14720 * @buffer: an pointer to a char array
14721 * @size: the size of the array
14722 *
14723 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014724 *
Owen Taylor3473f882001-02-23 17:55:21 +000014725 * Returns the resulting document tree
14726 */
14727
Daniel Veillard50822cb2001-07-26 20:05:51 +000014728xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014729 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14730}
14731
14732/**
14733 * xmlRecoverMemory:
14734 * @buffer: an pointer to a char array
14735 * @size: the size of the array
14736 *
14737 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014738 * In the case the document is not Well Formed, an attempt to
14739 * build a tree is tried anyway
14740 *
14741 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014742 */
14743
Daniel Veillard50822cb2001-07-26 20:05:51 +000014744xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014745 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14746}
14747
14748/**
14749 * xmlSAXUserParseMemory:
14750 * @sax: a SAX handler
14751 * @user_data: The user data returned on SAX callbacks
14752 * @buffer: an in-memory XML document input
14753 * @size: the length of the XML document in bytes
14754 *
14755 * A better SAX parsing routine.
14756 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014757 *
Owen Taylor3473f882001-02-23 17:55:21 +000014758 * Returns 0 in case of success or a error number otherwise
14759 */
14760int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014761 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014762 int ret = 0;
14763 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014764
14765 xmlInitParser();
14766
Owen Taylor3473f882001-02-23 17:55:21 +000014767 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14768 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014769 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14770 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014771 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014772 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014773
Daniel Veillard30211a02001-04-26 09:33:18 +000014774 if (user_data != NULL)
14775 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014776
Owen Taylor3473f882001-02-23 17:55:21 +000014777 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014778
Owen Taylor3473f882001-02-23 17:55:21 +000014779 if (ctxt->wellFormed)
14780 ret = 0;
14781 else {
14782 if (ctxt->errNo != 0)
14783 ret = ctxt->errNo;
14784 else
14785 ret = -1;
14786 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014787 if (sax != NULL)
14788 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014789 if (ctxt->myDoc != NULL) {
14790 xmlFreeDoc(ctxt->myDoc);
14791 ctxt->myDoc = NULL;
14792 }
Owen Taylor3473f882001-02-23 17:55:21 +000014793 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014794
Owen Taylor3473f882001-02-23 17:55:21 +000014795 return ret;
14796}
Daniel Veillard81273902003-09-30 00:43:48 +000014797#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014798
14799/**
14800 * xmlCreateDocParserCtxt:
14801 * @cur: a pointer to an array of xmlChar
14802 *
14803 * Creates a parser context for an XML in-memory document.
14804 *
14805 * Returns the new parser context or NULL
14806 */
14807xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014808xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014809 int len;
14810
14811 if (cur == NULL)
14812 return(NULL);
14813 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014814 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014815}
14816
Daniel Veillard81273902003-09-30 00:43:48 +000014817#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014818/**
14819 * xmlSAXParseDoc:
14820 * @sax: the SAX handler block
14821 * @cur: a pointer to an array of xmlChar
14822 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14823 * documents
14824 *
14825 * parse an XML in-memory document and build a tree.
14826 * It use the given SAX function block to handle the parsing callback.
14827 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014828 *
Owen Taylor3473f882001-02-23 17:55:21 +000014829 * Returns the resulting document tree
14830 */
14831
14832xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014833xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014834 xmlDocPtr ret;
14835 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014836 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014837
Daniel Veillard38936062004-11-04 17:45:11 +000014838 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014839
14840
14841 ctxt = xmlCreateDocParserCtxt(cur);
14842 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014843 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014844 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014845 ctxt->sax = sax;
14846 ctxt->userData = NULL;
14847 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014848 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014849
14850 xmlParseDocument(ctxt);
14851 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14852 else {
14853 ret = NULL;
14854 xmlFreeDoc(ctxt->myDoc);
14855 ctxt->myDoc = NULL;
14856 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014857 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014858 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014859 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014860
Owen Taylor3473f882001-02-23 17:55:21 +000014861 return(ret);
14862}
14863
14864/**
14865 * xmlParseDoc:
14866 * @cur: a pointer to an array of xmlChar
14867 *
14868 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014869 *
Owen Taylor3473f882001-02-23 17:55:21 +000014870 * Returns the resulting document tree
14871 */
14872
14873xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014874xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014875 return(xmlSAXParseDoc(NULL, cur, 0));
14876}
Daniel Veillard81273902003-09-30 00:43:48 +000014877#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014878
Daniel Veillard81273902003-09-30 00:43:48 +000014879#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014880/************************************************************************
14881 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014882 * Specific function to keep track of entities references *
14883 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014884 * *
14885 ************************************************************************/
14886
14887static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14888
14889/**
14890 * xmlAddEntityReference:
14891 * @ent : A valid entity
14892 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014893 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014894 *
14895 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14896 */
14897static void
14898xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14899 xmlNodePtr lastNode)
14900{
14901 if (xmlEntityRefFunc != NULL) {
14902 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14903 }
14904}
14905
14906
14907/**
14908 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014909 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014910 *
14911 * Set the function to call call back when a xml reference has been made
14912 */
14913void
14914xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14915{
14916 xmlEntityRefFunc = func;
14917}
Daniel Veillard81273902003-09-30 00:43:48 +000014918#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014919
14920/************************************************************************
14921 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014922 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014923 * *
14924 ************************************************************************/
14925
14926#ifdef LIBXML_XPATH_ENABLED
14927#include <libxml/xpath.h>
14928#endif
14929
Daniel Veillardffa3c742005-07-21 13:24:09 +000014930extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014931static int xmlParserInitialized = 0;
14932
14933/**
14934 * xmlInitParser:
14935 *
14936 * Initialization function for the XML parser.
14937 * This is not reentrant. Call once before processing in case of
14938 * use in multithreaded programs.
14939 */
14940
14941void
14942xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014943 if (xmlParserInitialized != 0)
14944 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014945
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014946#ifdef LIBXML_THREAD_ENABLED
14947 __xmlGlobalInitMutexLock();
14948 if (xmlParserInitialized == 0) {
14949#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014950 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014951 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014952 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14953 (xmlGenericError == NULL))
14954 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014955 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014956 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014957 xmlInitCharEncodingHandlers();
14958 xmlDefaultSAXHandlerInit();
14959 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014960#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014961 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014962#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014963#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014964 htmlInitAutoClose();
14965 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014966#endif
14967#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014968 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014969#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014970 xmlParserInitialized = 1;
14971#ifdef LIBXML_THREAD_ENABLED
14972 }
14973 __xmlGlobalInitMutexUnlock();
14974#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014975}
14976
14977/**
14978 * xmlCleanupParser:
14979 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014980 * This function name is somewhat misleading. It does not clean up
14981 * parser state, it cleans up memory allocated by the library itself.
14982 * It is a cleanup function for the XML library. It tries to reclaim all
14983 * related global memory allocated for the library processing.
14984 * It doesn't deallocate any document related memory. One should
14985 * call xmlCleanupParser() only when the process has finished using
14986 * the library and all XML/HTML documents built with it.
14987 * See also xmlInitParser() which has the opposite function of preparing
14988 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014989 *
14990 * WARNING: if your application is multithreaded or has plugin support
14991 * calling this may crash the application if another thread or
14992 * a plugin is still using libxml2. It's sometimes very hard to
14993 * guess if libxml2 is in use in the application, some libraries
14994 * or plugins may use it without notice. In case of doubt abstain
14995 * from calling this function or do it just before calling exit()
14996 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014997 */
14998
14999void
15000xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000015001 if (!xmlParserInitialized)
15002 return;
15003
Owen Taylor3473f882001-02-23 17:55:21 +000015004 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000015005#ifdef LIBXML_CATALOG_ENABLED
15006 xmlCatalogCleanup();
15007#endif
Daniel Veillard14412512005-01-21 23:53:26 +000015008 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000015009 xmlCleanupInputCallbacks();
15010#ifdef LIBXML_OUTPUT_ENABLED
15011 xmlCleanupOutputCallbacks();
15012#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015013#ifdef LIBXML_SCHEMAS_ENABLED
15014 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000015015 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015016#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000015017 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080015018 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000015019 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000015020 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000015021 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000015022}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015023
15024/************************************************************************
15025 * *
15026 * New set (2.6.0) of simpler and more flexible APIs *
15027 * *
15028 ************************************************************************/
15029
15030/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015031 * DICT_FREE:
15032 * @str: a string
15033 *
Jan Pokornýbb654fe2016-04-13 16:56:07 +020015034 * Free a string if it is not owned by the "dict" dictionary in the
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015035 * current scope
15036 */
15037#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015038 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015039 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
15040 xmlFree((char *)(str));
15041
15042/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015043 * xmlCtxtReset:
15044 * @ctxt: an XML parser context
15045 *
15046 * Reset a parser context
15047 */
15048void
15049xmlCtxtReset(xmlParserCtxtPtr ctxt)
15050{
15051 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015052 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015053
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015054 if (ctxt == NULL)
15055 return;
15056
15057 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015058
15059 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15060 xmlFreeInputStream(input);
15061 }
15062 ctxt->inputNr = 0;
15063 ctxt->input = NULL;
15064
15065 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000015066 if (ctxt->spaceTab != NULL) {
15067 ctxt->spaceTab[0] = -1;
15068 ctxt->space = &ctxt->spaceTab[0];
15069 } else {
15070 ctxt->space = NULL;
15071 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015072
15073
15074 ctxt->nodeNr = 0;
15075 ctxt->node = NULL;
15076
15077 ctxt->nameNr = 0;
15078 ctxt->name = NULL;
15079
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015080 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015081 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015082 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015083 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015084 DICT_FREE(ctxt->directory);
15085 ctxt->directory = NULL;
15086 DICT_FREE(ctxt->extSubURI);
15087 ctxt->extSubURI = NULL;
15088 DICT_FREE(ctxt->extSubSystem);
15089 ctxt->extSubSystem = NULL;
15090 if (ctxt->myDoc != NULL)
15091 xmlFreeDoc(ctxt->myDoc);
15092 ctxt->myDoc = NULL;
15093
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015094 ctxt->standalone = -1;
15095 ctxt->hasExternalSubset = 0;
15096 ctxt->hasPErefs = 0;
15097 ctxt->html = 0;
15098 ctxt->external = 0;
15099 ctxt->instate = XML_PARSER_START;
15100 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015101
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015102 ctxt->wellFormed = 1;
15103 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000015104 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015105 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015106#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015107 ctxt->vctxt.userData = ctxt;
15108 ctxt->vctxt.error = xmlParserValidityError;
15109 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015110#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015111 ctxt->record_info = 0;
15112 ctxt->nbChars = 0;
15113 ctxt->checkIndex = 0;
15114 ctxt->inSubset = 0;
15115 ctxt->errNo = XML_ERR_OK;
15116 ctxt->depth = 0;
15117 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15118 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000015119 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000015120 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080015121 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015122 xmlInitNodeInfoSeq(&ctxt->node_seq);
15123
15124 if (ctxt->attsDefault != NULL) {
15125 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15126 ctxt->attsDefault = NULL;
15127 }
15128 if (ctxt->attsSpecial != NULL) {
15129 xmlHashFree(ctxt->attsSpecial, NULL);
15130 ctxt->attsSpecial = NULL;
15131 }
15132
Daniel Veillard4432df22003-09-28 18:58:27 +000015133#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015134 if (ctxt->catalogs != NULL)
15135 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000015136#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000015137 if (ctxt->lastError.code != XML_ERR_OK)
15138 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015139}
15140
15141/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015142 * xmlCtxtResetPush:
15143 * @ctxt: an XML parser context
15144 * @chunk: a pointer to an array of chars
15145 * @size: number of chars in the array
15146 * @filename: an optional file name or URI
15147 * @encoding: the document encoding, or NULL
15148 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015149 * Reset a push parser context
15150 *
15151 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015152 */
15153int
15154xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15155 int size, const char *filename, const char *encoding)
15156{
15157 xmlParserInputPtr inputStream;
15158 xmlParserInputBufferPtr buf;
15159 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15160
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015161 if (ctxt == NULL)
15162 return(1);
15163
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015164 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15165 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15166
15167 buf = xmlAllocParserInputBuffer(enc);
15168 if (buf == NULL)
15169 return(1);
15170
15171 if (ctxt == NULL) {
15172 xmlFreeParserInputBuffer(buf);
15173 return(1);
15174 }
15175
15176 xmlCtxtReset(ctxt);
15177
15178 if (ctxt->pushTab == NULL) {
15179 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15180 sizeof(xmlChar *));
15181 if (ctxt->pushTab == NULL) {
15182 xmlErrMemory(ctxt, NULL);
15183 xmlFreeParserInputBuffer(buf);
15184 return(1);
15185 }
15186 }
15187
15188 if (filename == NULL) {
15189 ctxt->directory = NULL;
15190 } else {
15191 ctxt->directory = xmlParserGetDirectory(filename);
15192 }
15193
15194 inputStream = xmlNewInputStream(ctxt);
15195 if (inputStream == NULL) {
15196 xmlFreeParserInputBuffer(buf);
15197 return(1);
15198 }
15199
15200 if (filename == NULL)
15201 inputStream->filename = NULL;
15202 else
15203 inputStream->filename = (char *)
15204 xmlCanonicPath((const xmlChar *) filename);
15205 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080015206 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015207
15208 inputPush(ctxt, inputStream);
15209
15210 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15211 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015212 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15213 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015214
15215 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15216
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015217 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015218#ifdef DEBUG_PUSH
15219 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15220#endif
15221 }
15222
15223 if (encoding != NULL) {
15224 xmlCharEncodingHandlerPtr hdlr;
15225
Daniel Veillard37334572008-07-31 08:20:02 +000015226 if (ctxt->encoding != NULL)
15227 xmlFree((xmlChar *) ctxt->encoding);
15228 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15229
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015230 hdlr = xmlFindCharEncodingHandler(encoding);
15231 if (hdlr != NULL) {
15232 xmlSwitchToEncoding(ctxt, hdlr);
15233 } else {
15234 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15235 "Unsupported encoding %s\n", BAD_CAST encoding);
15236 }
15237 } else if (enc != XML_CHAR_ENCODING_NONE) {
15238 xmlSwitchEncoding(ctxt, enc);
15239 }
15240
15241 return(0);
15242}
15243
Daniel Veillard37334572008-07-31 08:20:02 +000015244
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015245/**
Daniel Veillard37334572008-07-31 08:20:02 +000015246 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015247 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015248 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015249 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015250 *
15251 * Applies the options to the parser context
15252 *
15253 * Returns 0 in case of success, the set of unknown or unimplemented options
15254 * in case of error.
15255 */
Daniel Veillard37334572008-07-31 08:20:02 +000015256static int
15257xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015258{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015259 if (ctxt == NULL)
15260 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015261 if (encoding != NULL) {
15262 if (ctxt->encoding != NULL)
15263 xmlFree((xmlChar *) ctxt->encoding);
15264 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15265 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266 if (options & XML_PARSE_RECOVER) {
15267 ctxt->recovery = 1;
15268 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015269 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015270 } else
15271 ctxt->recovery = 0;
15272 if (options & XML_PARSE_DTDLOAD) {
15273 ctxt->loadsubset = XML_DETECT_IDS;
15274 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015275 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015276 } else
15277 ctxt->loadsubset = 0;
15278 if (options & XML_PARSE_DTDATTR) {
15279 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15280 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015281 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015282 }
15283 if (options & XML_PARSE_NOENT) {
15284 ctxt->replaceEntities = 1;
15285 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15286 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015287 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015288 } else
15289 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015290 if (options & XML_PARSE_PEDANTIC) {
15291 ctxt->pedantic = 1;
15292 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015293 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015294 } else
15295 ctxt->pedantic = 0;
15296 if (options & XML_PARSE_NOBLANKS) {
15297 ctxt->keepBlanks = 0;
15298 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15299 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015300 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015301 } else
15302 ctxt->keepBlanks = 1;
15303 if (options & XML_PARSE_DTDVALID) {
15304 ctxt->validate = 1;
15305 if (options & XML_PARSE_NOWARNING)
15306 ctxt->vctxt.warning = NULL;
15307 if (options & XML_PARSE_NOERROR)
15308 ctxt->vctxt.error = NULL;
15309 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015310 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015311 } else
15312 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015313 if (options & XML_PARSE_NOWARNING) {
15314 ctxt->sax->warning = NULL;
15315 options -= XML_PARSE_NOWARNING;
15316 }
15317 if (options & XML_PARSE_NOERROR) {
15318 ctxt->sax->error = NULL;
15319 ctxt->sax->fatalError = NULL;
15320 options -= XML_PARSE_NOERROR;
15321 }
Daniel Veillard81273902003-09-30 00:43:48 +000015322#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015323 if (options & XML_PARSE_SAX1) {
15324 ctxt->sax->startElement = xmlSAX2StartElement;
15325 ctxt->sax->endElement = xmlSAX2EndElement;
15326 ctxt->sax->startElementNs = NULL;
15327 ctxt->sax->endElementNs = NULL;
15328 ctxt->sax->initialized = 1;
15329 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015330 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015331 }
Daniel Veillard81273902003-09-30 00:43:48 +000015332#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015333 if (options & XML_PARSE_NODICT) {
15334 ctxt->dictNames = 0;
15335 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015336 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015337 } else {
15338 ctxt->dictNames = 1;
15339 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015340 if (options & XML_PARSE_NOCDATA) {
15341 ctxt->sax->cdataBlock = NULL;
15342 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015343 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015344 }
15345 if (options & XML_PARSE_NSCLEAN) {
15346 ctxt->options |= XML_PARSE_NSCLEAN;
15347 options -= XML_PARSE_NSCLEAN;
15348 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015349 if (options & XML_PARSE_NONET) {
15350 ctxt->options |= XML_PARSE_NONET;
15351 options -= XML_PARSE_NONET;
15352 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015353 if (options & XML_PARSE_COMPACT) {
15354 ctxt->options |= XML_PARSE_COMPACT;
15355 options -= XML_PARSE_COMPACT;
15356 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015357 if (options & XML_PARSE_OLD10) {
15358 ctxt->options |= XML_PARSE_OLD10;
15359 options -= XML_PARSE_OLD10;
15360 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015361 if (options & XML_PARSE_NOBASEFIX) {
15362 ctxt->options |= XML_PARSE_NOBASEFIX;
15363 options -= XML_PARSE_NOBASEFIX;
15364 }
15365 if (options & XML_PARSE_HUGE) {
15366 ctxt->options |= XML_PARSE_HUGE;
15367 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015368 if (ctxt->dict != NULL)
15369 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015370 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015371 if (options & XML_PARSE_OLDSAX) {
15372 ctxt->options |= XML_PARSE_OLDSAX;
15373 options -= XML_PARSE_OLDSAX;
15374 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015375 if (options & XML_PARSE_IGNORE_ENC) {
15376 ctxt->options |= XML_PARSE_IGNORE_ENC;
15377 options -= XML_PARSE_IGNORE_ENC;
15378 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015379 if (options & XML_PARSE_BIG_LINES) {
15380 ctxt->options |= XML_PARSE_BIG_LINES;
15381 options -= XML_PARSE_BIG_LINES;
15382 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015383 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015384 return (options);
15385}
15386
15387/**
Daniel Veillard37334572008-07-31 08:20:02 +000015388 * xmlCtxtUseOptions:
15389 * @ctxt: an XML parser context
15390 * @options: a combination of xmlParserOption
15391 *
15392 * Applies the options to the parser context
15393 *
15394 * Returns 0 in case of success, the set of unknown or unimplemented options
15395 * in case of error.
15396 */
15397int
15398xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15399{
15400 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15401}
15402
15403/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015404 * xmlDoRead:
15405 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015406 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015407 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015408 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015409 * @reuse: keep the context for reuse
15410 *
15411 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015412 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015413 * Returns the resulting document tree or NULL
15414 */
15415static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015416xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15417 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015418{
15419 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015420
15421 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015422 if (encoding != NULL) {
15423 xmlCharEncodingHandlerPtr hdlr;
15424
15425 hdlr = xmlFindCharEncodingHandler(encoding);
15426 if (hdlr != NULL)
15427 xmlSwitchToEncoding(ctxt, hdlr);
15428 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015429 if ((URL != NULL) && (ctxt->input != NULL) &&
15430 (ctxt->input->filename == NULL))
15431 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015432 xmlParseDocument(ctxt);
15433 if ((ctxt->wellFormed) || ctxt->recovery)
15434 ret = ctxt->myDoc;
15435 else {
15436 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015437 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015438 xmlFreeDoc(ctxt->myDoc);
15439 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015440 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015441 ctxt->myDoc = NULL;
15442 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015443 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015444 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015445
15446 return (ret);
15447}
15448
15449/**
15450 * xmlReadDoc:
15451 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015452 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015453 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015454 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015455 *
15456 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015457 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015458 * Returns the resulting document tree
15459 */
15460xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015461xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015462{
15463 xmlParserCtxtPtr ctxt;
15464
15465 if (cur == NULL)
15466 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015467 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015468
15469 ctxt = xmlCreateDocParserCtxt(cur);
15470 if (ctxt == NULL)
15471 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015472 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015473}
15474
15475/**
15476 * xmlReadFile:
15477 * @filename: a file or URL
15478 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015479 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015480 *
15481 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015482 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015483 * Returns the resulting document tree
15484 */
15485xmlDocPtr
15486xmlReadFile(const char *filename, const char *encoding, int options)
15487{
15488 xmlParserCtxtPtr ctxt;
15489
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015490 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015491 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015492 if (ctxt == NULL)
15493 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015494 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015495}
15496
15497/**
15498 * xmlReadMemory:
15499 * @buffer: a pointer to a char array
15500 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015501 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015502 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015503 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015504 *
15505 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015506 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015507 * Returns the resulting document tree
15508 */
15509xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015510xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015511{
15512 xmlParserCtxtPtr ctxt;
15513
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015514 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015515 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15516 if (ctxt == NULL)
15517 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015518 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015519}
15520
15521/**
15522 * xmlReadFd:
15523 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015524 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015525 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015526 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015527 *
15528 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015529 * NOTE that the file descriptor will not be closed when the
15530 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015531 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015532 * Returns the resulting document tree
15533 */
15534xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015535xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015536{
15537 xmlParserCtxtPtr ctxt;
15538 xmlParserInputBufferPtr input;
15539 xmlParserInputPtr stream;
15540
15541 if (fd < 0)
15542 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015543 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015544
15545 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15546 if (input == NULL)
15547 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015548 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015549 ctxt = xmlNewParserCtxt();
15550 if (ctxt == NULL) {
15551 xmlFreeParserInputBuffer(input);
15552 return (NULL);
15553 }
15554 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15555 if (stream == NULL) {
15556 xmlFreeParserInputBuffer(input);
15557 xmlFreeParserCtxt(ctxt);
15558 return (NULL);
15559 }
15560 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015561 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015562}
15563
15564/**
15565 * xmlReadIO:
15566 * @ioread: an I/O read function
15567 * @ioclose: an I/O close function
15568 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015569 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015570 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015571 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015572 *
15573 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015574 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015575 * Returns the resulting document tree
15576 */
15577xmlDocPtr
15578xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015579 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015580{
15581 xmlParserCtxtPtr ctxt;
15582 xmlParserInputBufferPtr input;
15583 xmlParserInputPtr stream;
15584
15585 if (ioread == NULL)
15586 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015587 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015588
15589 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15590 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015591 if (input == NULL) {
15592 if (ioclose != NULL)
15593 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015594 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015595 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015596 ctxt = xmlNewParserCtxt();
15597 if (ctxt == NULL) {
15598 xmlFreeParserInputBuffer(input);
15599 return (NULL);
15600 }
15601 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15602 if (stream == NULL) {
15603 xmlFreeParserInputBuffer(input);
15604 xmlFreeParserCtxt(ctxt);
15605 return (NULL);
15606 }
15607 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015608 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015609}
15610
15611/**
15612 * xmlCtxtReadDoc:
15613 * @ctxt: an XML parser context
15614 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015615 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015616 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015617 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015618 *
15619 * parse an XML in-memory document and build a tree.
15620 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015621 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015622 * Returns the resulting document tree
15623 */
15624xmlDocPtr
15625xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015626 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015627{
15628 xmlParserInputPtr stream;
15629
15630 if (cur == NULL)
15631 return (NULL);
15632 if (ctxt == NULL)
15633 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015634 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015635
15636 xmlCtxtReset(ctxt);
15637
15638 stream = xmlNewStringInputStream(ctxt, cur);
15639 if (stream == NULL) {
15640 return (NULL);
15641 }
15642 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015643 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015644}
15645
15646/**
15647 * xmlCtxtReadFile:
15648 * @ctxt: an XML parser context
15649 * @filename: a file or URL
15650 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015651 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015652 *
15653 * parse an XML file from the filesystem or the network.
15654 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015655 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015656 * Returns the resulting document tree
15657 */
15658xmlDocPtr
15659xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15660 const char *encoding, int options)
15661{
15662 xmlParserInputPtr stream;
15663
15664 if (filename == NULL)
15665 return (NULL);
15666 if (ctxt == NULL)
15667 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015668 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015669
15670 xmlCtxtReset(ctxt);
15671
Daniel Veillard29614c72004-11-26 10:47:26 +000015672 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015673 if (stream == NULL) {
15674 return (NULL);
15675 }
15676 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015677 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015678}
15679
15680/**
15681 * xmlCtxtReadMemory:
15682 * @ctxt: an XML parser context
15683 * @buffer: a pointer to a char array
15684 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015685 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015686 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015687 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015688 *
15689 * parse an XML in-memory document and build a tree.
15690 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015691 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015692 * Returns the resulting document tree
15693 */
15694xmlDocPtr
15695xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015696 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015697{
15698 xmlParserInputBufferPtr input;
15699 xmlParserInputPtr stream;
15700
15701 if (ctxt == NULL)
15702 return (NULL);
15703 if (buffer == NULL)
15704 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015705 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015706
15707 xmlCtxtReset(ctxt);
15708
15709 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15710 if (input == NULL) {
15711 return(NULL);
15712 }
15713
15714 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15715 if (stream == NULL) {
15716 xmlFreeParserInputBuffer(input);
15717 return(NULL);
15718 }
15719
15720 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015721 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015722}
15723
15724/**
15725 * xmlCtxtReadFd:
15726 * @ctxt: an XML parser context
15727 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015728 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015729 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015730 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015731 *
15732 * parse an XML from a file descriptor and build a tree.
15733 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015734 * NOTE that the file descriptor will not be closed when the
15735 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015736 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015737 * Returns the resulting document tree
15738 */
15739xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015740xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15741 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015742{
15743 xmlParserInputBufferPtr input;
15744 xmlParserInputPtr stream;
15745
15746 if (fd < 0)
15747 return (NULL);
15748 if (ctxt == NULL)
15749 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015750 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015751
15752 xmlCtxtReset(ctxt);
15753
15754
15755 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15756 if (input == NULL)
15757 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015758 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015759 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15760 if (stream == NULL) {
15761 xmlFreeParserInputBuffer(input);
15762 return (NULL);
15763 }
15764 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015765 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015766}
15767
15768/**
15769 * xmlCtxtReadIO:
15770 * @ctxt: an XML parser context
15771 * @ioread: an I/O read function
15772 * @ioclose: an I/O close function
15773 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015774 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015775 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015776 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015777 *
15778 * parse an XML document from I/O functions and source and build a tree.
15779 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015780 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015781 * Returns the resulting document tree
15782 */
15783xmlDocPtr
15784xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15785 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015786 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015787 const char *encoding, int options)
15788{
15789 xmlParserInputBufferPtr input;
15790 xmlParserInputPtr stream;
15791
15792 if (ioread == NULL)
15793 return (NULL);
15794 if (ctxt == NULL)
15795 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015796 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015797
15798 xmlCtxtReset(ctxt);
15799
15800 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15801 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015802 if (input == NULL) {
15803 if (ioclose != NULL)
15804 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015805 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015806 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015807 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15808 if (stream == NULL) {
15809 xmlFreeParserInputBuffer(input);
15810 return (NULL);
15811 }
15812 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015813 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015814}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015815
15816#define bottom_parser
15817#include "elfgcchack.h"