blob: 0677030c2d9607eea36c424bf4f42f81d22245f2 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080097static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
Daniel Veillard0161e632008-08-28 15:36:32 +000099/************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105#define XML_PARSER_BIG_ENTITY 1000
106#define XML_PARSER_LOT_ENTITY 5000
107
108/*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114#define XML_PARSER_NON_LINEAR 10
115
116/*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800126xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800127 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000128{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800129 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0)) {
142 unsigned long oldnbent = ctxt->nbentities;
143 xmlChar *rep;
144
145 ent->checked = 1;
146
147 rep = xmlStringDecodeEntities(ctxt, ent->content,
148 XML_SUBSTITUTE_REF, 0, 0, 0);
149
150 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
151 if (rep != NULL) {
152 if (xmlStrchr(rep, '<'))
153 ent->checked |= 1;
154 xmlFree(rep);
155 rep = NULL;
156 }
157 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800158 if (replacement != 0) {
159 if (replacement < XML_MAX_TEXT_LENGTH)
160 return(0);
161
162 /*
163 * If the volume of entity copy reaches 10 times the
164 * amount of parsed data and over the large text threshold
165 * then that's very likely to be an abuse.
166 */
167 if (ctxt->input != NULL) {
168 consumed = ctxt->input->consumed +
169 (ctxt->input->cur - ctxt->input->base);
170 }
171 consumed += ctxt->sizeentities;
172
173 if (replacement < XML_PARSER_NON_LINEAR * consumed)
174 return(0);
175 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000176 /*
177 * Do the check based on the replacement size of the entity
178 */
179 if (size < XML_PARSER_BIG_ENTITY)
180 return(0);
181
182 /*
183 * A limit on the amount of text data reasonably used
184 */
185 if (ctxt->input != NULL) {
186 consumed = ctxt->input->consumed +
187 (ctxt->input->cur - ctxt->input->base);
188 }
189 consumed += ctxt->sizeentities;
190
191 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
192 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
193 return (0);
194 } else if (ent != NULL) {
195 /*
196 * use the number of parsed entities in the replacement
197 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800198 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000199
200 /*
201 * The amount of data parsed counting entities size only once
202 */
203 if (ctxt->input != NULL) {
204 consumed = ctxt->input->consumed +
205 (ctxt->input->cur - ctxt->input->base);
206 }
207 consumed += ctxt->sizeentities;
208
209 /*
210 * Check the density of entities for the amount of data
211 * knowing an entity reference will take at least 3 bytes
212 */
213 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
214 return (0);
215 } else {
216 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800217 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000218 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800219 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
220 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
221 (ctxt->nbentities <= 10000))
222 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000223 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000224 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
225 return (1);
226}
227
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000228/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000229 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000230 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000231 * arbitrary depth limit for the XML documents that we allow to
232 * process. This is not a limitation of the parser but a safety
233 * boundary feature. It can be disabled with the XML_PARSE_HUGE
234 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000235 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000236unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000237
Daniel Veillard0fb18932003-09-07 09:14:37 +0000238
Daniel Veillard0161e632008-08-28 15:36:32 +0000239
240#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000241#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000242#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000243#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
244
Daniel Veillard1f972e92012-08-15 10:16:37 +0800245/**
246 * XML_PARSER_CHUNK_SIZE
247 *
248 * When calling GROW that's the minimal amount of data
249 * the parser expected to have received. It is not a hard
250 * limit but an optimization when reading strings like Names
251 * It is not strictly needed as long as inputs available characters
252 * are followed by 0, which should be provided by the I/O level
253 */
254#define XML_PARSER_CHUNK_SIZE 100
255
Owen Taylor3473f882001-02-23 17:55:21 +0000256/*
Owen Taylor3473f882001-02-23 17:55:21 +0000257 * List of XML prefixed PI allowed by W3C specs
258 */
259
Daniel Veillardb44025c2001-10-11 22:55:55 +0000260static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000261 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800262 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000263 NULL
264};
265
Daniel Veillarda07050d2003-10-19 14:46:32 +0000266
Owen Taylor3473f882001-02-23 17:55:21 +0000267/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200268static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
269 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000270
Daniel Veillard7d515752003-09-26 19:12:37 +0000271static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000272xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
273 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000274 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000275 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000276
Daniel Veillard37334572008-07-31 08:20:02 +0000277static int
278xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
279 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000280#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000281static void
282xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
283 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000284#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000285
Daniel Veillard7d515752003-09-26 19:12:37 +0000286static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000287xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
288 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000289
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000290static int
291xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
292
Daniel Veillarde57ec792003-09-10 10:50:59 +0000293/************************************************************************
294 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800295 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 * *
297 ************************************************************************/
298
299/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 * xmlErrAttributeDup:
301 * @ctxt: an XML parser context
302 * @prefix: the attribute prefix
303 * @localname: the attribute localname
304 *
305 * Handle a redefinition of attribute error
306 */
307static void
308xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
309 const xmlChar * localname)
310{
Daniel Veillard157fee02003-10-31 10:36:03 +0000311 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
312 (ctxt->instate == XML_PARSER_EOF))
313 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000314 if (ctxt != NULL)
315 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200316
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000317 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000318 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200319 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000320 (const char *) localname, NULL, NULL, 0, 0,
321 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000322 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000323 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200324 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 (const char *) prefix, (const char *) localname,
326 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
327 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000328 if (ctxt != NULL) {
329 ctxt->wellFormed = 0;
330 if (ctxt->recovery == 0)
331 ctxt->disableSAX = 1;
332 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333}
334
335/**
336 * xmlFatalErr:
337 * @ctxt: an XML parser context
338 * @error: the error number
339 * @extra: extra information string
340 *
341 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
342 */
343static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345{
346 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348
Daniel Veillard157fee02003-10-31 10:36:03 +0000349 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
350 (ctxt->instate == XML_PARSER_EOF))
351 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 switch (error) {
353 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800354 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800357 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000359 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800360 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 errmsg = "internal error";
364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800366 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800369 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800372 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800375 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800378 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800381 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800384 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800387 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000389 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800390 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000392 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800393 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800396 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800399 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800402 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000403 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000404 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800405 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800408 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800411 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800414 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800417 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800420 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800423 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800426 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "Fragment not allowed";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800441 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800444 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800447 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800450 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800453 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
468 case XML_ERR_CONDSEC_INVALID_KEYWORD:
469 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800470 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000471 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000472 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800473 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000474 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000475 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800476 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000477 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000478 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800479 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000480 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000481 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800482 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000484 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800485 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000486 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000487 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800488 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000489 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000490 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800491 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000493 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800494 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000495 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000496 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800497 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000499 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800503 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000504 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000507 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000508 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800509 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000510 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000511 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800512 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000513 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000514 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800515 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000516 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000517 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800518 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000520 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800521 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000522 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800523 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800524 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800525 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000526#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800528 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000529 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000530#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800532 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000533 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800534 if (info == NULL)
535 snprintf(errstr, 128, "%s\n", errmsg);
536 else
537 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000538 if (ctxt != NULL)
539 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000540 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800541 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000543 if (ctxt != NULL) {
544 ctxt->wellFormed = 0;
545 if (ctxt->recovery == 0)
546 ctxt->disableSAX = 1;
547 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000548}
549
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000550/**
551 * xmlFatalErrMsg:
552 * @ctxt: an XML parser context
553 * @error: the error number
554 * @msg: the error message
555 *
556 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
557 */
558static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000559xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000561{
Daniel Veillard157fee02003-10-31 10:36:03 +0000562 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
563 (ctxt->instate == XML_PARSER_EOF))
564 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000565 if (ctxt != NULL)
566 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000567 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200568 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000569 if (ctxt != NULL) {
570 ctxt->wellFormed = 0;
571 if (ctxt->recovery == 0)
572 ctxt->disableSAX = 1;
573 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574}
575
576/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000577 * xmlWarningMsg:
578 * @ctxt: an XML parser context
579 * @error: the error number
580 * @msg: the error message
581 * @str1: extra data
582 * @str2: extra data
583 *
584 * Handle a warning.
585 */
586static void
587xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
588 const char *msg, const xmlChar *str1, const xmlChar *str2)
589{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000590 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000591
Daniel Veillard157fee02003-10-31 10:36:03 +0000592 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
593 (ctxt->instate == XML_PARSER_EOF))
594 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000595 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
596 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000597 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200598 if (ctxt != NULL) {
599 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000600 (ctxt->sax) ? ctxt->sax->warning : NULL,
601 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000602 ctxt, NULL, XML_FROM_PARSER, error,
603 XML_ERR_WARNING, NULL, 0,
604 (const char *) str1, (const char *) str2, NULL, 0, 0,
605 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200606 } else {
607 __xmlRaiseError(schannel, NULL, NULL,
608 ctxt, NULL, XML_FROM_PARSER, error,
609 XML_ERR_WARNING, NULL, 0,
610 (const char *) str1, (const char *) str2, NULL, 0, 0,
611 msg, (const char *) str1, (const char *) str2);
612 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613}
614
615/**
616 * xmlValidityError:
617 * @ctxt: an XML parser context
618 * @error: the error number
619 * @msg: the error message
620 * @str1: extra data
621 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000622 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000623 */
624static void
625xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000626 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000627{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000628 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000629
630 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
631 (ctxt->instate == XML_PARSER_EOF))
632 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000633 if (ctxt != NULL) {
634 ctxt->errNo = error;
635 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
636 schannel = ctxt->sax->serror;
637 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200638 if (ctxt != NULL) {
639 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000640 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000641 ctxt, NULL, XML_FROM_DTD, error,
642 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000643 (const char *) str2, NULL, 0, 0,
644 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200646 } else {
647 __xmlRaiseError(schannel, NULL, NULL,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000652 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000653}
654
655/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000656 * xmlFatalErrMsgInt:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @val: an integer value
661 *
662 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
663 */
664static void
665xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000666 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000667{
Daniel Veillard157fee02003-10-31 10:36:03 +0000668 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
669 (ctxt->instate == XML_PARSER_EOF))
670 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000671 if (ctxt != NULL)
672 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000673 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000674 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
675 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000676 if (ctxt != NULL) {
677 ctxt->wellFormed = 0;
678 if (ctxt->recovery == 0)
679 ctxt->disableSAX = 1;
680 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000681}
682
683/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000684 * xmlFatalErrMsgStrIntStr:
685 * @ctxt: an XML parser context
686 * @error: the error number
687 * @msg: the error message
688 * @str1: an string info
689 * @val: an integer value
690 * @str2: an string info
691 *
692 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
693 */
694static void
695xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800696 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000697 const xmlChar *str2)
698{
Daniel Veillard157fee02003-10-31 10:36:03 +0000699 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
700 (ctxt->instate == XML_PARSER_EOF))
701 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000702 if (ctxt != NULL)
703 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000704 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000705 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
706 NULL, 0, (const char *) str1, (const char *) str2,
707 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000708 if (ctxt != NULL) {
709 ctxt->wellFormed = 0;
710 if (ctxt->recovery == 0)
711 ctxt->disableSAX = 1;
712 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000713}
714
715/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000716 * xmlFatalErrMsgStr:
717 * @ctxt: an XML parser context
718 * @error: the error number
719 * @msg: the error message
720 * @val: a string value
721 *
722 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
723 */
724static void
725xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000726 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000727{
Daniel Veillard157fee02003-10-31 10:36:03 +0000728 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
729 (ctxt->instate == XML_PARSER_EOF))
730 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000731 if (ctxt != NULL)
732 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000733 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000734 XML_FROM_PARSER, error, XML_ERR_FATAL,
735 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
736 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000737 if (ctxt != NULL) {
738 ctxt->wellFormed = 0;
739 if (ctxt->recovery == 0)
740 ctxt->disableSAX = 1;
741 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000742}
743
744/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000745 * xmlErrMsgStr:
746 * @ctxt: an XML parser context
747 * @error: the error number
748 * @msg: the error message
749 * @val: a string value
750 *
751 * Handle a non fatal parser error
752 */
753static void
754xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
755 const char *msg, const xmlChar * val)
756{
Daniel Veillard157fee02003-10-31 10:36:03 +0000757 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
758 (ctxt->instate == XML_PARSER_EOF))
759 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000760 if (ctxt != NULL)
761 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000762 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000763 XML_FROM_PARSER, error, XML_ERR_ERROR,
764 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
765 val);
766}
767
768/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000769 * xmlNsErr:
770 * @ctxt: an XML parser context
771 * @error: the error number
772 * @msg: the message
773 * @info1: extra information string
774 * @info2: extra information string
775 *
776 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
777 */
778static void
779xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
780 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000781 const xmlChar * info1, const xmlChar * info2,
782 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000783{
Daniel Veillard157fee02003-10-31 10:36:03 +0000784 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
785 (ctxt->instate == XML_PARSER_EOF))
786 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000787 if (ctxt != NULL)
788 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000789 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000790 XML_ERR_ERROR, NULL, 0, (const char *) info1,
791 (const char *) info2, (const char *) info3, 0, 0, msg,
792 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000793 if (ctxt != NULL)
794 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000795}
796
Daniel Veillard37334572008-07-31 08:20:02 +0000797/**
798 * xmlNsWarn
799 * @ctxt: an XML parser context
800 * @error: the error number
801 * @msg: the message
802 * @info1: extra information string
803 * @info2: extra information string
804 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800805 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000806 */
807static void
808xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
809 const char *msg,
810 const xmlChar * info1, const xmlChar * info2,
811 const xmlChar * info3)
812{
813 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
814 (ctxt->instate == XML_PARSER_EOF))
815 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000816 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
817 XML_ERR_WARNING, NULL, 0, (const char *) info1,
818 (const char *) info2, (const char *) info3, 0, 0, msg,
819 info1, info2, info3);
820}
821
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000822/************************************************************************
823 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800824 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000825 * *
826 ************************************************************************/
827
828/**
829 * xmlHasFeature:
830 * @feature: the feature to be examined
831 *
832 * Examines if the library has been compiled with a given feature.
833 *
834 * Returns a non-zero value if the feature exist, otherwise zero.
835 * Returns zero (0) if the feature does not exist or an unknown
836 * unknown feature is requested, non-zero otherwise.
837 */
838int
839xmlHasFeature(xmlFeature feature)
840{
841 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000842 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000843#ifdef LIBXML_THREAD_ENABLED
844 return(1);
845#else
846 return(0);
847#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000848 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000849#ifdef LIBXML_TREE_ENABLED
850 return(1);
851#else
852 return(0);
853#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000854 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000855#ifdef LIBXML_OUTPUT_ENABLED
856 return(1);
857#else
858 return(0);
859#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000860 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000861#ifdef LIBXML_PUSH_ENABLED
862 return(1);
863#else
864 return(0);
865#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000866 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000867#ifdef LIBXML_READER_ENABLED
868 return(1);
869#else
870 return(0);
871#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000872 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000873#ifdef LIBXML_PATTERN_ENABLED
874 return(1);
875#else
876 return(0);
877#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000878 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000879#ifdef LIBXML_WRITER_ENABLED
880 return(1);
881#else
882 return(0);
883#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000884 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000885#ifdef LIBXML_SAX1_ENABLED
886 return(1);
887#else
888 return(0);
889#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000890 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000891#ifdef LIBXML_FTP_ENABLED
892 return(1);
893#else
894 return(0);
895#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000896 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000897#ifdef LIBXML_HTTP_ENABLED
898 return(1);
899#else
900 return(0);
901#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000902 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000903#ifdef LIBXML_VALID_ENABLED
904 return(1);
905#else
906 return(0);
907#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000908 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000909#ifdef LIBXML_HTML_ENABLED
910 return(1);
911#else
912 return(0);
913#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000914 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000915#ifdef LIBXML_LEGACY_ENABLED
916 return(1);
917#else
918 return(0);
919#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000920 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000921#ifdef LIBXML_C14N_ENABLED
922 return(1);
923#else
924 return(0);
925#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000926 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000927#ifdef LIBXML_CATALOG_ENABLED
928 return(1);
929#else
930 return(0);
931#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000932 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000933#ifdef LIBXML_XPATH_ENABLED
934 return(1);
935#else
936 return(0);
937#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000938 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000939#ifdef LIBXML_XPTR_ENABLED
940 return(1);
941#else
942 return(0);
943#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000944 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000945#ifdef LIBXML_XINCLUDE_ENABLED
946 return(1);
947#else
948 return(0);
949#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000950 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000951#ifdef LIBXML_ICONV_ENABLED
952 return(1);
953#else
954 return(0);
955#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000956 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000957#ifdef LIBXML_ISO8859X_ENABLED
958 return(1);
959#else
960 return(0);
961#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000962 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000963#ifdef LIBXML_UNICODE_ENABLED
964 return(1);
965#else
966 return(0);
967#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000968 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000969#ifdef LIBXML_REGEXP_ENABLED
970 return(1);
971#else
972 return(0);
973#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000974 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000975#ifdef LIBXML_AUTOMATA_ENABLED
976 return(1);
977#else
978 return(0);
979#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000980 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000981#ifdef LIBXML_EXPR_ENABLED
982 return(1);
983#else
984 return(0);
985#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000986 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000987#ifdef LIBXML_SCHEMAS_ENABLED
988 return(1);
989#else
990 return(0);
991#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000992 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000993#ifdef LIBXML_SCHEMATRON_ENABLED
994 return(1);
995#else
996 return(0);
997#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000998 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000999#ifdef LIBXML_MODULES_ENABLED
1000 return(1);
1001#else
1002 return(0);
1003#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001004 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001005#ifdef LIBXML_DEBUG_ENABLED
1006 return(1);
1007#else
1008 return(0);
1009#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001010 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001011#ifdef DEBUG_MEMORY_LOCATION
1012 return(1);
1013#else
1014 return(0);
1015#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001016 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001017#ifdef LIBXML_DEBUG_RUNTIME
1018 return(1);
1019#else
1020 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001021#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001022 case XML_WITH_ZLIB:
1023#ifdef LIBXML_ZLIB_ENABLED
1024 return(1);
1025#else
1026 return(0);
1027#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001028 case XML_WITH_LZMA:
1029#ifdef LIBXML_LZMA_ENABLED
1030 return(1);
1031#else
1032 return(0);
1033#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001034 case XML_WITH_ICU:
1035#ifdef LIBXML_ICU_ENABLED
1036 return(1);
1037#else
1038 return(0);
1039#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001040 default:
1041 break;
1042 }
1043 return(0);
1044}
1045
1046/************************************************************************
1047 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001048 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001049 * *
1050 ************************************************************************/
1051
1052/**
1053 * xmlDetectSAX2:
1054 * @ctxt: an XML parser context
1055 *
1056 * Do the SAX2 detection and specific intialization
1057 */
1058static void
1059xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1060 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001061#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1063 ((ctxt->sax->startElementNs != NULL) ||
1064 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001065#else
1066 ctxt->sax2 = 1;
1067#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001068
1069 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1070 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1071 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001072 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1073 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001074 xmlErrMemory(ctxt, NULL);
1075 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001076}
1077
Daniel Veillarde57ec792003-09-10 10:50:59 +00001078typedef struct _xmlDefAttrs xmlDefAttrs;
1079typedef xmlDefAttrs *xmlDefAttrsPtr;
1080struct _xmlDefAttrs {
1081 int nbAttrs; /* number of defaulted attributes on that element */
1082 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001083 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001084};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085
1086/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001087 * xmlAttrNormalizeSpace:
1088 * @src: the source string
1089 * @dst: the target string
1090 *
1091 * Normalize the space in non CDATA attribute values:
1092 * If the attribute type is not CDATA, then the XML processor MUST further
1093 * process the normalized attribute value by discarding any leading and
1094 * trailing space (#x20) characters, and by replacing sequences of space
1095 * (#x20) characters by a single space (#x20) character.
1096 * Note that the size of dst need to be at least src, and if one doesn't need
1097 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1098 * passing src as dst is just fine.
1099 *
1100 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1101 * is needed.
1102 */
1103static xmlChar *
1104xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1105{
1106 if ((src == NULL) || (dst == NULL))
1107 return(NULL);
1108
1109 while (*src == 0x20) src++;
1110 while (*src != 0) {
1111 if (*src == 0x20) {
1112 while (*src == 0x20) src++;
1113 if (*src != 0)
1114 *dst++ = 0x20;
1115 } else {
1116 *dst++ = *src++;
1117 }
1118 }
1119 *dst = 0;
1120 if (dst == src)
1121 return(NULL);
1122 return(dst);
1123}
1124
1125/**
1126 * xmlAttrNormalizeSpace2:
1127 * @src: the source string
1128 *
1129 * Normalize the space in non CDATA attribute values, a slightly more complex
1130 * front end to avoid allocation problems when running on attribute values
1131 * coming from the input.
1132 *
1133 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1134 * is needed.
1135 */
1136static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001137xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001138{
1139 int i;
1140 int remove_head = 0;
1141 int need_realloc = 0;
1142 const xmlChar *cur;
1143
1144 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1145 return(NULL);
1146 i = *len;
1147 if (i <= 0)
1148 return(NULL);
1149
1150 cur = src;
1151 while (*cur == 0x20) {
1152 cur++;
1153 remove_head++;
1154 }
1155 while (*cur != 0) {
1156 if (*cur == 0x20) {
1157 cur++;
1158 if ((*cur == 0x20) || (*cur == 0)) {
1159 need_realloc = 1;
1160 break;
1161 }
1162 } else
1163 cur++;
1164 }
1165 if (need_realloc) {
1166 xmlChar *ret;
1167
1168 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1169 if (ret == NULL) {
1170 xmlErrMemory(ctxt, NULL);
1171 return(NULL);
1172 }
1173 xmlAttrNormalizeSpace(ret, ret);
1174 *len = (int) strlen((const char *)ret);
1175 return(ret);
1176 } else if (remove_head) {
1177 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001178 memmove(src, src + remove_head, 1 + *len);
1179 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001180 }
1181 return(NULL);
1182}
1183
1184/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 * xmlAddDefAttrs:
1186 * @ctxt: an XML parser context
1187 * @fullname: the element fullname
1188 * @fullattr: the attribute fullname
1189 * @value: the attribute value
1190 *
1191 * Add a defaulted attribute for an element
1192 */
1193static void
1194xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1195 const xmlChar *fullname,
1196 const xmlChar *fullattr,
1197 const xmlChar *value) {
1198 xmlDefAttrsPtr defaults;
1199 int len;
1200 const xmlChar *name;
1201 const xmlChar *prefix;
1202
Daniel Veillard6a31b832008-03-26 14:06:44 +00001203 /*
1204 * Allows to detect attribute redefinitions
1205 */
1206 if (ctxt->attsSpecial != NULL) {
1207 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1208 return;
1209 }
1210
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001212 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001213 if (ctxt->attsDefault == NULL)
1214 goto mem_error;
1215 }
1216
1217 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001218 * split the element name into prefix:localname , the string found
1219 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001220 */
1221 name = xmlSplitQName3(fullname, &len);
1222 if (name == NULL) {
1223 name = xmlDictLookup(ctxt->dict, fullname, -1);
1224 prefix = NULL;
1225 } else {
1226 name = xmlDictLookup(ctxt->dict, name, -1);
1227 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1228 }
1229
1230 /*
1231 * make sure there is some storage
1232 */
1233 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1234 if (defaults == NULL) {
1235 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001236 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001237 if (defaults == NULL)
1238 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001239 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001240 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001241 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1242 defaults, NULL) < 0) {
1243 xmlFree(defaults);
1244 goto mem_error;
1245 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001246 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001247 xmlDefAttrsPtr temp;
1248
1249 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001250 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001251 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001252 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001253 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001254 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001255 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1256 defaults, NULL) < 0) {
1257 xmlFree(defaults);
1258 goto mem_error;
1259 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001260 }
1261
1262 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001263 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 * are within the DTD and hen not associated to namespace names.
1265 */
1266 name = xmlSplitQName3(fullattr, &len);
1267 if (name == NULL) {
1268 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1269 prefix = NULL;
1270 } else {
1271 name = xmlDictLookup(ctxt->dict, name, -1);
1272 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1273 }
1274
Daniel Veillardae0765b2008-07-31 19:54:59 +00001275 defaults->values[5 * defaults->nbAttrs] = name;
1276 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001277 /* intern the string and precompute the end */
1278 len = xmlStrlen(value);
1279 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001280 defaults->values[5 * defaults->nbAttrs + 2] = value;
1281 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1282 if (ctxt->external)
1283 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1284 else
1285 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001286 defaults->nbAttrs++;
1287
1288 return;
1289
1290mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001291 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001292 return;
1293}
1294
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001295/**
1296 * xmlAddSpecialAttr:
1297 * @ctxt: an XML parser context
1298 * @fullname: the element fullname
1299 * @fullattr: the attribute fullname
1300 * @type: the attribute type
1301 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001302 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001303 */
1304static void
1305xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1306 const xmlChar *fullname,
1307 const xmlChar *fullattr,
1308 int type)
1309{
1310 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001311 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001312 if (ctxt->attsSpecial == NULL)
1313 goto mem_error;
1314 }
1315
Daniel Veillardac4118d2008-01-11 05:27:32 +00001316 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1317 return;
1318
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001319 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1320 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001321 return;
1322
1323mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001324 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001325 return;
1326}
1327
Daniel Veillard4432df22003-09-28 18:58:27 +00001328/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001329 * xmlCleanSpecialAttrCallback:
1330 *
1331 * Removes CDATA attributes from the special attribute table
1332 */
1333static void
1334xmlCleanSpecialAttrCallback(void *payload, void *data,
1335 const xmlChar *fullname, const xmlChar *fullattr,
1336 const xmlChar *unused ATTRIBUTE_UNUSED) {
1337 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1338
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001339 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001340 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1341 }
1342}
1343
1344/**
1345 * xmlCleanSpecialAttr:
1346 * @ctxt: an XML parser context
1347 *
1348 * Trim the list of attributes defined to remove all those of type
1349 * CDATA as they are not special. This call should be done when finishing
1350 * to parse the DTD and before starting to parse the document root.
1351 */
1352static void
1353xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1354{
1355 if (ctxt->attsSpecial == NULL)
1356 return;
1357
1358 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1359
1360 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1361 xmlHashFree(ctxt->attsSpecial, NULL);
1362 ctxt->attsSpecial = NULL;
1363 }
1364 return;
1365}
1366
1367/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001368 * xmlCheckLanguageID:
1369 * @lang: pointer to the string value
1370 *
1371 * Checks that the value conforms to the LanguageID production:
1372 *
1373 * NOTE: this is somewhat deprecated, those productions were removed from
1374 * the XML Second edition.
1375 *
1376 * [33] LanguageID ::= Langcode ('-' Subcode)*
1377 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1378 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1379 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1380 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1381 * [38] Subcode ::= ([a-z] | [A-Z])+
1382 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001383 * The current REC reference the sucessors of RFC 1766, currently 5646
1384 *
1385 * http://www.rfc-editor.org/rfc/rfc5646.txt
1386 * langtag = language
1387 * ["-" script]
1388 * ["-" region]
1389 * *("-" variant)
1390 * *("-" extension)
1391 * ["-" privateuse]
1392 * language = 2*3ALPHA ; shortest ISO 639 code
1393 * ["-" extlang] ; sometimes followed by
1394 * ; extended language subtags
1395 * / 4ALPHA ; or reserved for future use
1396 * / 5*8ALPHA ; or registered language subtag
1397 *
1398 * extlang = 3ALPHA ; selected ISO 639 codes
1399 * *2("-" 3ALPHA) ; permanently reserved
1400 *
1401 * script = 4ALPHA ; ISO 15924 code
1402 *
1403 * region = 2ALPHA ; ISO 3166-1 code
1404 * / 3DIGIT ; UN M.49 code
1405 *
1406 * variant = 5*8alphanum ; registered variants
1407 * / (DIGIT 3alphanum)
1408 *
1409 * extension = singleton 1*("-" (2*8alphanum))
1410 *
1411 * ; Single alphanumerics
1412 * ; "x" reserved for private use
1413 * singleton = DIGIT ; 0 - 9
1414 * / %x41-57 ; A - W
1415 * / %x59-5A ; Y - Z
1416 * / %x61-77 ; a - w
1417 * / %x79-7A ; y - z
1418 *
1419 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1420 * The parser below doesn't try to cope with extension or privateuse
1421 * that could be added but that's not interoperable anyway
1422 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001423 * Returns 1 if correct 0 otherwise
1424 **/
1425int
1426xmlCheckLanguageID(const xmlChar * lang)
1427{
Daniel Veillard60587d62010-11-04 15:16:27 +01001428 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001429
1430 if (cur == NULL)
1431 return (0);
1432 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001433 ((cur[0] == 'I') && (cur[1] == '-')) ||
1434 ((cur[0] == 'x') && (cur[1] == '-')) ||
1435 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001436 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001437 * Still allow IANA code and user code which were coming
1438 * from the previous version of the XML-1.0 specification
1439 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001440 */
1441 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001442 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001443 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1444 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001445 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001446 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001447 nxt = cur;
1448 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1449 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1450 nxt++;
1451 if (nxt - cur >= 4) {
1452 /*
1453 * Reserved
1454 */
1455 if ((nxt - cur > 8) || (nxt[0] != 0))
1456 return(0);
1457 return(1);
1458 }
1459 if (nxt - cur < 2)
1460 return(0);
1461 /* we got an ISO 639 code */
1462 if (nxt[0] == 0)
1463 return(1);
1464 if (nxt[0] != '-')
1465 return(0);
1466
1467 nxt++;
1468 cur = nxt;
1469 /* now we can have extlang or script or region or variant */
1470 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1471 goto region_m49;
1472
1473 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1474 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1475 nxt++;
1476 if (nxt - cur == 4)
1477 goto script;
1478 if (nxt - cur == 2)
1479 goto region;
1480 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1481 goto variant;
1482 if (nxt - cur != 3)
1483 return(0);
1484 /* we parsed an extlang */
1485 if (nxt[0] == 0)
1486 return(1);
1487 if (nxt[0] != '-')
1488 return(0);
1489
1490 nxt++;
1491 cur = nxt;
1492 /* now we can have script or region or variant */
1493 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1494 goto region_m49;
1495
1496 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 nxt++;
1499 if (nxt - cur == 2)
1500 goto region;
1501 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502 goto variant;
1503 if (nxt - cur != 4)
1504 return(0);
1505 /* we parsed a script */
1506script:
1507 if (nxt[0] == 0)
1508 return(1);
1509 if (nxt[0] != '-')
1510 return(0);
1511
1512 nxt++;
1513 cur = nxt;
1514 /* now we can have region or variant */
1515 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1516 goto region_m49;
1517
1518 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1519 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1520 nxt++;
1521
1522 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523 goto variant;
1524 if (nxt - cur != 2)
1525 return(0);
1526 /* we parsed a region */
1527region:
1528 if (nxt[0] == 0)
1529 return(1);
1530 if (nxt[0] != '-')
1531 return(0);
1532
1533 nxt++;
1534 cur = nxt;
1535 /* now we can just have a variant */
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538 nxt++;
1539
1540 if ((nxt - cur < 5) || (nxt - cur > 8))
1541 return(0);
1542
1543 /* we parsed a variant */
1544variant:
1545 if (nxt[0] == 0)
1546 return(1);
1547 if (nxt[0] != '-')
1548 return(0);
1549 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001550 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001551
1552region_m49:
1553 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1554 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1555 nxt += 3;
1556 goto region;
1557 }
1558 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001559}
1560
Owen Taylor3473f882001-02-23 17:55:21 +00001561/************************************************************************
1562 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001563 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001564 * *
1565 ************************************************************************/
1566
Daniel Veillard8ed10722009-08-20 19:17:36 +02001567static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1568 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001569
Daniel Veillard0fb18932003-09-07 09:14:37 +00001570#ifdef SAX2
1571/**
1572 * nsPush:
1573 * @ctxt: an XML parser context
1574 * @prefix: the namespace prefix or NULL
1575 * @URL: the namespace name
1576 *
1577 * Pushes a new parser namespace on top of the ns stack
1578 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001579 * Returns -1 in case of error, -2 if the namespace should be discarded
1580 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 */
1582static int
1583nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1584{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001585 if (ctxt->options & XML_PARSE_NSCLEAN) {
1586 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001587 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001588 if (ctxt->nsTab[i] == prefix) {
1589 /* in scope */
1590 if (ctxt->nsTab[i + 1] == URL)
1591 return(-2);
1592 /* out of scope keep it */
1593 break;
1594 }
1595 }
1596 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001597 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1598 ctxt->nsMax = 10;
1599 ctxt->nsNr = 0;
1600 ctxt->nsTab = (const xmlChar **)
1601 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1602 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001603 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001604 ctxt->nsMax = 0;
1605 return (-1);
1606 }
1607 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001608 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001609 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001610 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1611 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1612 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001613 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001614 ctxt->nsMax /= 2;
1615 return (-1);
1616 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001617 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001618 }
1619 ctxt->nsTab[ctxt->nsNr++] = prefix;
1620 ctxt->nsTab[ctxt->nsNr++] = URL;
1621 return (ctxt->nsNr);
1622}
1623/**
1624 * nsPop:
1625 * @ctxt: an XML parser context
1626 * @nr: the number to pop
1627 *
1628 * Pops the top @nr parser prefix/namespace from the ns stack
1629 *
1630 * Returns the number of namespaces removed
1631 */
1632static int
1633nsPop(xmlParserCtxtPtr ctxt, int nr)
1634{
1635 int i;
1636
1637 if (ctxt->nsTab == NULL) return(0);
1638 if (ctxt->nsNr < nr) {
1639 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1640 nr = ctxt->nsNr;
1641 }
1642 if (ctxt->nsNr <= 0)
1643 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001644
Daniel Veillard0fb18932003-09-07 09:14:37 +00001645 for (i = 0;i < nr;i++) {
1646 ctxt->nsNr--;
1647 ctxt->nsTab[ctxt->nsNr] = NULL;
1648 }
1649 return(nr);
1650}
1651#endif
1652
1653static int
1654xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1655 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657 int maxatts;
1658
1659 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001660 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001661 atts = (const xmlChar **)
1662 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001663 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001664 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001665 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1666 if (attallocs == NULL) goto mem_error;
1667 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001668 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001669 } else if (nr + 5 > ctxt->maxatts) {
1670 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001671 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1672 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001673 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001674 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001675 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1676 (maxatts / 5) * sizeof(int));
1677 if (attallocs == NULL) goto mem_error;
1678 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001679 ctxt->maxatts = maxatts;
1680 }
1681 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001682mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001683 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001684 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001685}
1686
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001687/**
1688 * inputPush:
1689 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001690 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691 *
1692 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001693 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001694 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001695 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001696int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1698{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001699 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001700 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001701 if (ctxt->inputNr >= ctxt->inputMax) {
1702 ctxt->inputMax *= 2;
1703 ctxt->inputTab =
1704 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1705 ctxt->inputMax *
1706 sizeof(ctxt->inputTab[0]));
1707 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001709 xmlFreeInputStream(value);
1710 ctxt->inputMax /= 2;
1711 value = NULL;
1712 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001713 }
1714 }
1715 ctxt->inputTab[ctxt->inputNr] = value;
1716 ctxt->input = value;
1717 return (ctxt->inputNr++);
1718}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001719/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001720 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 * @ctxt: an XML parser context
1722 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001723 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001724 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001725 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001726 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001727xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001728inputPop(xmlParserCtxtPtr ctxt)
1729{
1730 xmlParserInputPtr ret;
1731
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001732 if (ctxt == NULL)
1733 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001734 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001735 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001736 ctxt->inputNr--;
1737 if (ctxt->inputNr > 0)
1738 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1739 else
1740 ctxt->input = NULL;
1741 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001742 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001743 return (ret);
1744}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001745/**
1746 * nodePush:
1747 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001748 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001749 *
1750 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001751 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001752 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001753 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001754int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001755nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1756{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001757 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001758 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001759 xmlNodePtr *tmp;
1760
1761 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1762 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001763 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001764 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001765 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001766 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001767 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001768 ctxt->nodeTab = tmp;
1769 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001771 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1772 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001773 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001774 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001775 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001776 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001777 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001778 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 ctxt->nodeTab[ctxt->nodeNr] = value;
1780 ctxt->node = value;
1781 return (ctxt->nodeNr++);
1782}
Daniel Veillard8915c152008-08-26 13:05:34 +00001783
Daniel Veillard1c732d22002-11-30 11:22:59 +00001784/**
1785 * nodePop:
1786 * @ctxt: an XML parser context
1787 *
1788 * Pops the top element node from the node stack
1789 *
1790 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001791 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001792xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001793nodePop(xmlParserCtxtPtr ctxt)
1794{
1795 xmlNodePtr ret;
1796
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001797 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001798 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001799 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001800 ctxt->nodeNr--;
1801 if (ctxt->nodeNr > 0)
1802 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1803 else
1804 ctxt->node = NULL;
1805 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001806 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001807 return (ret);
1808}
Daniel Veillarda2351322004-06-27 12:08:10 +00001809
1810#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001811/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001812 * nameNsPush:
1813 * @ctxt: an XML parser context
1814 * @value: the element name
1815 * @prefix: the element prefix
1816 * @URI: the element namespace name
1817 *
1818 * Pushes a new element name/prefix/URL on top of the name stack
1819 *
1820 * Returns -1 in case of error, the index in the stack otherwise
1821 */
1822static int
1823nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1824 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1825{
1826 if (ctxt->nameNr >= ctxt->nameMax) {
1827 const xmlChar * *tmp;
1828 void **tmp2;
1829 ctxt->nameMax *= 2;
1830 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1831 ctxt->nameMax *
1832 sizeof(ctxt->nameTab[0]));
1833 if (tmp == NULL) {
1834 ctxt->nameMax /= 2;
1835 goto mem_error;
1836 }
1837 ctxt->nameTab = tmp;
1838 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1839 ctxt->nameMax * 3 *
1840 sizeof(ctxt->pushTab[0]));
1841 if (tmp2 == NULL) {
1842 ctxt->nameMax /= 2;
1843 goto mem_error;
1844 }
1845 ctxt->pushTab = tmp2;
1846 }
1847 ctxt->nameTab[ctxt->nameNr] = value;
1848 ctxt->name = value;
1849 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1850 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001851 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001852 return (ctxt->nameNr++);
1853mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001854 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001855 return (-1);
1856}
1857/**
1858 * nameNsPop:
1859 * @ctxt: an XML parser context
1860 *
1861 * Pops the top element/prefix/URI name from the name stack
1862 *
1863 * Returns the name just removed
1864 */
1865static const xmlChar *
1866nameNsPop(xmlParserCtxtPtr ctxt)
1867{
1868 const xmlChar *ret;
1869
1870 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001871 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001872 ctxt->nameNr--;
1873 if (ctxt->nameNr > 0)
1874 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1875 else
1876 ctxt->name = NULL;
1877 ret = ctxt->nameTab[ctxt->nameNr];
1878 ctxt->nameTab[ctxt->nameNr] = NULL;
1879 return (ret);
1880}
Daniel Veillarda2351322004-06-27 12:08:10 +00001881#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001882
1883/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001884 * namePush:
1885 * @ctxt: an XML parser context
1886 * @value: the element name
1887 *
1888 * Pushes a new element name on top of the name stack
1889 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001890 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001891 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001892int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001893namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001894{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001895 if (ctxt == NULL) return (-1);
1896
Daniel Veillard1c732d22002-11-30 11:22:59 +00001897 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001898 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001899 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001900 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001901 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001902 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001903 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001904 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001905 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001906 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001907 }
1908 ctxt->nameTab[ctxt->nameNr] = value;
1909 ctxt->name = value;
1910 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001911mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001913 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001914}
1915/**
1916 * namePop:
1917 * @ctxt: an XML parser context
1918 *
1919 * Pops the top element name from the name stack
1920 *
1921 * Returns the name just removed
1922 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001923const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001924namePop(xmlParserCtxtPtr ctxt)
1925{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001926 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001927
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001928 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1929 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001930 ctxt->nameNr--;
1931 if (ctxt->nameNr > 0)
1932 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1933 else
1934 ctxt->name = NULL;
1935 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001936 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001937 return (ret);
1938}
Owen Taylor3473f882001-02-23 17:55:21 +00001939
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001940static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001941 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001942 int *tmp;
1943
Owen Taylor3473f882001-02-23 17:55:21 +00001944 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001945 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1946 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1947 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001948 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001949 ctxt->spaceMax /=2;
1950 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001951 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001952 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001953 }
1954 ctxt->spaceTab[ctxt->spaceNr] = val;
1955 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1956 return(ctxt->spaceNr++);
1957}
1958
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001959static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001960 int ret;
1961 if (ctxt->spaceNr <= 0) return(0);
1962 ctxt->spaceNr--;
1963 if (ctxt->spaceNr > 0)
1964 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1965 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001966 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001967 ret = ctxt->spaceTab[ctxt->spaceNr];
1968 ctxt->spaceTab[ctxt->spaceNr] = -1;
1969 return(ret);
1970}
1971
1972/*
1973 * Macros for accessing the content. Those should be used only by the parser,
1974 * and not exported.
1975 *
1976 * Dirty macros, i.e. one often need to make assumption on the context to
1977 * use them
1978 *
1979 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1980 * To be used with extreme caution since operations consuming
1981 * characters may move the input buffer to a different location !
1982 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1983 * This should be used internally by the parser
1984 * only to compare to ASCII values otherwise it would break when
1985 * running with UTF-8 encoding.
1986 * RAW same as CUR but in the input buffer, bypass any token
1987 * extraction that may have been done
1988 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1989 * to compare on ASCII based substring.
1990 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001991 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001992 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001993 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001994 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1995 *
1996 * NEXT Skip to the next character, this does the proper decoding
1997 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001998 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001999 * CUR_CHAR(l) returns the current unicode character (int), set l
2000 * to the number of xmlChars used for the encoding [0-5].
2001 * CUR_SCHAR same but operate on a string instead of the context
2002 * COPY_BUF copy the current unicode char to the target buffer, increment
2003 * the index
2004 * GROW, SHRINK handling of input buffers
2005 */
2006
Daniel Veillardfdc91562002-07-01 21:52:03 +00002007#define RAW (*ctxt->input->cur)
2008#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002009#define NXT(val) ctxt->input->cur[(val)]
2010#define CUR_PTR ctxt->input->cur
2011
Daniel Veillarda07050d2003-10-19 14:46:32 +00002012#define CMP4( s, c1, c2, c3, c4 ) \
2013 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2014 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2015#define CMP5( s, c1, c2, c3, c4, c5 ) \
2016 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2017#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2018 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2019#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2020 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2021#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2022 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2023#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2024 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2025 ((unsigned char *) s)[ 8 ] == c9 )
2026#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2027 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2028 ((unsigned char *) s)[ 9 ] == c10 )
2029
Owen Taylor3473f882001-02-23 17:55:21 +00002030#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002031 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002032 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002033 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002034 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2035 xmlPopInput(ctxt); \
2036 } while (0)
2037
Daniel Veillard0b787f32004-03-26 17:29:53 +00002038#define SKIPL(val) do { \
2039 int skipl; \
2040 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002041 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002042 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002043 } else ctxt->input->col++; \
2044 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002045 ctxt->input->cur++; \
2046 } \
2047 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2048 if ((*ctxt->input->cur == 0) && \
2049 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2050 xmlPopInput(ctxt); \
2051 } while (0)
2052
Daniel Veillarda880b122003-04-21 21:36:41 +00002053#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002054 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2055 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002056 xmlSHRINK (ctxt);
2057
2058static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2059 xmlParserInputShrink(ctxt->input);
2060 if ((*ctxt->input->cur == 0) &&
2061 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2062 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002063 }
Owen Taylor3473f882001-02-23 17:55:21 +00002064
Daniel Veillarda880b122003-04-21 21:36:41 +00002065#define GROW if ((ctxt->progressive == 0) && \
2066 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002067 xmlGROW (ctxt);
2068
2069static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002070 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2071 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2072
2073 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2074 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002075 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002076 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2077 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002078 xmlHaltParser(ctxt);
2079 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002080 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002081 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002082 if ((ctxt->input->cur > ctxt->input->end) ||
2083 (ctxt->input->cur < ctxt->input->base)) {
2084 xmlHaltParser(ctxt);
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2086 return;
2087 }
Daniel Veillard59df7832010-02-02 10:24:01 +01002088 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002089 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2090 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002091}
Owen Taylor3473f882001-02-23 17:55:21 +00002092
2093#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2094
2095#define NEXT xmlNextChar(ctxt)
2096
Daniel Veillard21a0f912001-02-25 19:54:14 +00002097#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002098 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002099 ctxt->input->cur++; \
2100 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002101 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002102 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2103 }
2104
Owen Taylor3473f882001-02-23 17:55:21 +00002105#define NEXTL(l) do { \
2106 if (*(ctxt->input->cur) == '\n') { \
2107 ctxt->input->line++; ctxt->input->col = 1; \
2108 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002109 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002110 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002111 } while (0)
2112
2113#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2114#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2115
2116#define COPY_BUF(l,b,i,v) \
2117 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002118 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002119
2120/**
2121 * xmlSkipBlankChars:
2122 * @ctxt: the XML parser context
2123 *
2124 * skip all blanks character found at that point in the input streams.
2125 * It pops up finished entities in the process if allowable at that point.
2126 *
2127 * Returns the number of space chars skipped
2128 */
2129
2130int
2131xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002132 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002133
2134 /*
2135 * It's Okay to use CUR/NEXT here since all the blanks are on
2136 * the ASCII range.
2137 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002138 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2139 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002140 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002141 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002142 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002143 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002144 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002145 if (*cur == '\n') {
2146 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002147 } else {
2148 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002149 }
2150 cur++;
2151 res++;
2152 if (*cur == 0) {
2153 ctxt->input->cur = cur;
2154 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2155 cur = ctxt->input->cur;
2156 }
2157 }
2158 ctxt->input->cur = cur;
2159 } else {
2160 int cur;
2161 do {
2162 cur = CUR;
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002163 while ((IS_BLANK_CH(cur) && /* CHECKED tstblanks.xml */
2164 (ctxt->instate != XML_PARSER_EOF))) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002165 NEXT;
2166 cur = CUR;
2167 res++;
2168 }
2169 while ((cur == 0) && (ctxt->inputNr > 1) &&
2170 (ctxt->instate != XML_PARSER_COMMENT)) {
2171 xmlPopInput(ctxt);
2172 cur = CUR;
2173 }
2174 /*
2175 * Need to handle support of entities branching here
2176 */
2177 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
Daniel Veillard3bd6ae12015-11-20 15:06:02 +08002178 } while ((IS_BLANK(cur)) && /* CHECKED tstblanks.xml */
2179 (ctxt->instate != XML_PARSER_EOF));
Daniel Veillard02141ea2001-04-30 11:46:40 +00002180 }
Owen Taylor3473f882001-02-23 17:55:21 +00002181 return(res);
2182}
2183
2184/************************************************************************
2185 * *
2186 * Commodity functions to handle entities *
2187 * *
2188 ************************************************************************/
2189
2190/**
2191 * xmlPopInput:
2192 * @ctxt: an XML parser context
2193 *
2194 * xmlPopInput: the current input pointed by ctxt->input came to an end
2195 * pop it and return the next char.
2196 *
2197 * Returns the current xmlChar in the parser context
2198 */
2199xmlChar
2200xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002201 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002202 if (xmlParserDebugEntities)
2203 xmlGenericError(xmlGenericErrorContext,
2204 "Popping input %d\n", ctxt->inputNr);
2205 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002206 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002207 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2208 return(xmlPopInput(ctxt));
2209 return(CUR);
2210}
2211
2212/**
2213 * xmlPushInput:
2214 * @ctxt: an XML parser context
2215 * @input: an XML parser input fragment (entity, XML fragment ...).
2216 *
2217 * xmlPushInput: switch to a new input stream which is stacked on top
2218 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002219 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002220 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002221int
Owen Taylor3473f882001-02-23 17:55:21 +00002222xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002223 int ret;
2224 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002225
2226 if (xmlParserDebugEntities) {
2227 if ((ctxt->input != NULL) && (ctxt->input->filename))
2228 xmlGenericError(xmlGenericErrorContext,
2229 "%s(%d): ", ctxt->input->filename,
2230 ctxt->input->line);
2231 xmlGenericError(xmlGenericErrorContext,
2232 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2233 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002234 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002235 if (ctxt->instate == XML_PARSER_EOF)
2236 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002237 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002238 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002239}
2240
2241/**
2242 * xmlParseCharRef:
2243 * @ctxt: an XML parser context
2244 *
2245 * parse Reference declarations
2246 *
2247 * [66] CharRef ::= '&#' [0-9]+ ';' |
2248 * '&#x' [0-9a-fA-F]+ ';'
2249 *
2250 * [ WFC: Legal Character ]
2251 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002252 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002253 *
2254 * Returns the value parsed (as an int), 0 in case of error
2255 */
2256int
2257xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002258 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002259 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002260 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002261
Owen Taylor3473f882001-02-23 17:55:21 +00002262 /*
2263 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2264 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002265 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002266 (NXT(2) == 'x')) {
2267 SKIP(3);
2268 GROW;
2269 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002270 if (count++ > 20) {
2271 count = 0;
2272 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002273 if (ctxt->instate == XML_PARSER_EOF)
2274 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002275 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002276 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002277 val = val * 16 + (CUR - '0');
2278 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2279 val = val * 16 + (CUR - 'a') + 10;
2280 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2281 val = val * 16 + (CUR - 'A') + 10;
2282 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002283 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002284 val = 0;
2285 break;
2286 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002287 if (val > 0x10FFFF)
2288 outofrange = val;
2289
Owen Taylor3473f882001-02-23 17:55:21 +00002290 NEXT;
2291 count++;
2292 }
2293 if (RAW == ';') {
2294 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002295 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002296 ctxt->nbChars ++;
2297 ctxt->input->cur++;
2298 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002299 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002300 SKIP(2);
2301 GROW;
2302 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002303 if (count++ > 20) {
2304 count = 0;
2305 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002306 if (ctxt->instate == XML_PARSER_EOF)
2307 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002308 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002309 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002310 val = val * 10 + (CUR - '0');
2311 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002312 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002313 val = 0;
2314 break;
2315 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002316 if (val > 0x10FFFF)
2317 outofrange = val;
2318
Owen Taylor3473f882001-02-23 17:55:21 +00002319 NEXT;
2320 count++;
2321 }
2322 if (RAW == ';') {
2323 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002324 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002325 ctxt->nbChars ++;
2326 ctxt->input->cur++;
2327 }
2328 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002329 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002330 }
2331
2332 /*
2333 * [ WFC: Legal Character ]
2334 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002335 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002336 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002337 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002338 return(val);
2339 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002340 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2341 "xmlParseCharRef: invalid xmlChar value %d\n",
2342 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002343 }
2344 return(0);
2345}
2346
2347/**
2348 * xmlParseStringCharRef:
2349 * @ctxt: an XML parser context
2350 * @str: a pointer to an index in the string
2351 *
2352 * parse Reference declarations, variant parsing from a string rather
2353 * than an an input flow.
2354 *
2355 * [66] CharRef ::= '&#' [0-9]+ ';' |
2356 * '&#x' [0-9a-fA-F]+ ';'
2357 *
2358 * [ WFC: Legal Character ]
2359 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002360 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002361 *
2362 * Returns the value parsed (as an int), 0 in case of error, str will be
2363 * updated to the current value of the index
2364 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002365static int
Owen Taylor3473f882001-02-23 17:55:21 +00002366xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2367 const xmlChar *ptr;
2368 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002369 unsigned int val = 0;
2370 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002371
2372 if ((str == NULL) || (*str == NULL)) return(0);
2373 ptr = *str;
2374 cur = *ptr;
2375 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2376 ptr += 3;
2377 cur = *ptr;
2378 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002379 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002380 val = val * 16 + (cur - '0');
2381 else if ((cur >= 'a') && (cur <= 'f'))
2382 val = val * 16 + (cur - 'a') + 10;
2383 else if ((cur >= 'A') && (cur <= 'F'))
2384 val = val * 16 + (cur - 'A') + 10;
2385 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002386 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002387 val = 0;
2388 break;
2389 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002390 if (val > 0x10FFFF)
2391 outofrange = val;
2392
Owen Taylor3473f882001-02-23 17:55:21 +00002393 ptr++;
2394 cur = *ptr;
2395 }
2396 if (cur == ';')
2397 ptr++;
2398 } else if ((cur == '&') && (ptr[1] == '#')){
2399 ptr += 2;
2400 cur = *ptr;
2401 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002402 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002403 val = val * 10 + (cur - '0');
2404 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002405 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002406 val = 0;
2407 break;
2408 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002409 if (val > 0x10FFFF)
2410 outofrange = val;
2411
Owen Taylor3473f882001-02-23 17:55:21 +00002412 ptr++;
2413 cur = *ptr;
2414 }
2415 if (cur == ';')
2416 ptr++;
2417 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002418 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002419 return(0);
2420 }
2421 *str = ptr;
2422
2423 /*
2424 * [ WFC: Legal Character ]
2425 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002426 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002427 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002428 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002429 return(val);
2430 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002431 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2432 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2433 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002434 }
2435 return(0);
2436}
2437
2438/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002439 * xmlNewBlanksWrapperInputStream:
2440 * @ctxt: an XML parser context
2441 * @entity: an Entity pointer
2442 *
2443 * Create a new input stream for wrapping
2444 * blanks around a PEReference
2445 *
2446 * Returns the new input stream or NULL
2447 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002448
Daniel Veillardf5582f12002-06-11 10:08:16 +00002449static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002450
Daniel Veillardf4862f02002-09-10 11:13:43 +00002451static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002452xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2453 xmlParserInputPtr input;
2454 xmlChar *buffer;
2455 size_t length;
2456 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002457 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2458 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002459 return(NULL);
2460 }
2461 if (xmlParserDebugEntities)
2462 xmlGenericError(xmlGenericErrorContext,
2463 "new blanks wrapper for entity: %s\n", entity->name);
2464 input = xmlNewInputStream(ctxt);
2465 if (input == NULL) {
2466 return(NULL);
2467 }
2468 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002469 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002470 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002471 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002472 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002473 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002474 }
2475 buffer [0] = ' ';
2476 buffer [1] = '%';
2477 buffer [length-3] = ';';
2478 buffer [length-2] = ' ';
2479 buffer [length-1] = 0;
2480 memcpy(buffer + 2, entity->name, length - 5);
2481 input->free = deallocblankswrapper;
2482 input->base = buffer;
2483 input->cur = buffer;
2484 input->length = length;
2485 input->end = &buffer[length];
2486 return(input);
2487}
2488
2489/**
Owen Taylor3473f882001-02-23 17:55:21 +00002490 * xmlParserHandlePEReference:
2491 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002492 *
Owen Taylor3473f882001-02-23 17:55:21 +00002493 * [69] PEReference ::= '%' Name ';'
2494 *
2495 * [ WFC: No Recursion ]
2496 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002497 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002498 *
2499 * [ WFC: Entity Declared ]
2500 * In a document without any DTD, a document with only an internal DTD
2501 * subset which contains no parameter entity references, or a document
2502 * with "standalone='yes'", ... ... The declaration of a parameter
2503 * entity must precede any reference to it...
2504 *
2505 * [ VC: Entity Declared ]
2506 * In a document with an external subset or external parameter entities
2507 * with "standalone='no'", ... ... The declaration of a parameter entity
2508 * must precede any reference to it...
2509 *
2510 * [ WFC: In DTD ]
2511 * Parameter-entity references may only appear in the DTD.
2512 * NOTE: misleading but this is handled.
2513 *
2514 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002515 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002516 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002517 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002518 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002519 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002520 */
2521void
2522xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002523 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002524 xmlEntityPtr entity = NULL;
2525 xmlParserInputPtr input;
2526
Owen Taylor3473f882001-02-23 17:55:21 +00002527 if (RAW != '%') return;
2528 switch(ctxt->instate) {
2529 case XML_PARSER_CDATA_SECTION:
2530 return;
2531 case XML_PARSER_COMMENT:
2532 return;
2533 case XML_PARSER_START_TAG:
2534 return;
2535 case XML_PARSER_END_TAG:
2536 return;
2537 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002538 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002539 return;
2540 case XML_PARSER_PROLOG:
2541 case XML_PARSER_START:
2542 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002543 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002544 return;
2545 case XML_PARSER_ENTITY_DECL:
2546 case XML_PARSER_CONTENT:
2547 case XML_PARSER_ATTRIBUTE_VALUE:
2548 case XML_PARSER_PI:
2549 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002550 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002551 /* we just ignore it there */
2552 return;
2553 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002554 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002555 return;
2556 case XML_PARSER_ENTITY_VALUE:
2557 /*
2558 * NOTE: in the case of entity values, we don't do the
2559 * substitution here since we need the literal
2560 * entity value to be able to save the internal
2561 * subset of the document.
2562 * This will be handled by xmlStringDecodeEntities
2563 */
2564 return;
2565 case XML_PARSER_DTD:
2566 /*
2567 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2568 * In the internal DTD subset, parameter-entity references
2569 * can occur only where markup declarations can occur, not
2570 * within markup declarations.
2571 * In that case this is handled in xmlParseMarkupDecl
2572 */
2573 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2574 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002575 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002576 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002577 break;
2578 case XML_PARSER_IGNORE:
2579 return;
2580 }
2581
2582 NEXT;
2583 name = xmlParseName(ctxt);
2584 if (xmlParserDebugEntities)
2585 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002586 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002587 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002588 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002589 } else {
2590 if (RAW == ';') {
2591 NEXT;
2592 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2593 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002594 if (ctxt->instate == XML_PARSER_EOF)
2595 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002596 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002597
Owen Taylor3473f882001-02-23 17:55:21 +00002598 /*
2599 * [ WFC: Entity Declared ]
2600 * In a document without any DTD, a document with only an
2601 * internal DTD subset which contains no parameter entity
2602 * references, or a document with "standalone='yes'", ...
2603 * ... The declaration of a parameter entity must precede
2604 * any reference to it...
2605 */
2606 if ((ctxt->standalone == 1) ||
2607 ((ctxt->hasExternalSubset == 0) &&
2608 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002609 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002610 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002611 } else {
2612 /*
2613 * [ VC: Entity Declared ]
2614 * In a document with an external subset or external
2615 * parameter entities with "standalone='no'", ...
2616 * ... The declaration of a parameter entity must precede
2617 * any reference to it...
2618 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002619 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2620 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2621 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002622 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002623 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002624 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2625 "PEReference: %%%s; not found\n",
2626 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002627 ctxt->valid = 0;
2628 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002629 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002630 } else if (ctxt->input->free != deallocblankswrapper) {
2631 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002632 if (xmlPushInput(ctxt, input) < 0)
2633 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002634 } else {
2635 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2636 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002637 xmlChar start[4];
2638 xmlCharEncoding enc;
2639
Owen Taylor3473f882001-02-23 17:55:21 +00002640 /*
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002641 * Note: external parameter entities will not be loaded, it
2642 * is not required for a non-validating parser, unless the
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002643 * option of validating, or substituting entities were
2644 * given. Doing so is far more secure as the parser will
2645 * only process data coming from the document entity by
2646 * default.
2647 */
2648 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2649 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2650 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002651 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2652 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2653 (ctxt->replaceEntities == 0) &&
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002654 (ctxt->validate == 0))
2655 return;
2656
2657 /*
Owen Taylor3473f882001-02-23 17:55:21 +00002658 * handle the extra spaces added before and after
2659 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002660 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002661 */
2662 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002663 if (xmlPushInput(ctxt, input) < 0)
2664 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002665
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002666 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002667 * Get the 4 first bytes and decode the charset
2668 * if enc != XML_CHAR_ENCODING_NONE
2669 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002670 * Note that, since we may have some non-UTF8
2671 * encoding (like UTF16, bug 135229), the 'length'
2672 * is not known, but we can calculate based upon
2673 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002674 */
2675 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002676 if (ctxt->instate == XML_PARSER_EOF)
2677 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002678 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002679 start[0] = RAW;
2680 start[1] = NXT(1);
2681 start[2] = NXT(2);
2682 start[3] = NXT(3);
2683 enc = xmlDetectCharEncoding(start, 4);
2684 if (enc != XML_CHAR_ENCODING_NONE) {
2685 xmlSwitchEncoding(ctxt, enc);
2686 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002687 }
2688
Owen Taylor3473f882001-02-23 17:55:21 +00002689 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002690 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2691 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002692 xmlParseTextDecl(ctxt);
2693 }
Owen Taylor3473f882001-02-23 17:55:21 +00002694 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002695 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2696 "PEReference: %s is not a parameter entity\n",
2697 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002698 }
2699 }
2700 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002701 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002702 }
Owen Taylor3473f882001-02-23 17:55:21 +00002703 }
2704}
2705
2706/*
2707 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002708 * buffer##_size is expected to be a size_t
2709 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002710 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002711#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002712 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002713 size_t new_size = buffer##_size * 2 + n; \
2714 if (new_size < buffer##_size) goto mem_error; \
2715 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002716 if (tmp == NULL) goto mem_error; \
2717 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002718 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002719}
2720
2721/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002722 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002723 * @ctxt: the parser context
2724 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002725 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002726 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2727 * @end: an end marker xmlChar, 0 if none
2728 * @end2: an end marker xmlChar, 0 if none
2729 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002730 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002731 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002732 *
2733 * [67] Reference ::= EntityRef | CharRef
2734 *
2735 * [69] PEReference ::= '%' Name ';'
2736 *
2737 * Returns A newly allocated string with the substitution done. The caller
2738 * must deallocate it !
2739 */
2740xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002741xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2742 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002743 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002744 size_t buffer_size = 0;
2745 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002746
2747 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002748 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002749 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002750 xmlEntityPtr ent;
2751 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002752
Daniel Veillarda82b1822004-11-08 16:24:57 +00002753 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002754 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002755 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002756
Daniel Veillard0161e632008-08-28 15:36:32 +00002757 if (((ctxt->depth > 40) &&
2758 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2759 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002760 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002761 return(NULL);
2762 }
2763
2764 /*
2765 * allocate a translation buffer.
2766 */
2767 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002768 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002769 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002770
2771 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002772 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002773 * we are operating on already parsed values.
2774 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002775 if (str < last)
2776 c = CUR_SCHAR(str, l);
2777 else
2778 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002779 while ((c != 0) && (c != end) && /* non input consuming loop */
2780 (c != end2) && (c != end3)) {
2781
2782 if (c == 0) break;
2783 if ((c == '&') && (str[1] == '#')) {
2784 int val = xmlParseStringCharRef(ctxt, &str);
2785 if (val != 0) {
2786 COPY_BUF(0,buffer,nbchars,val);
2787 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002788 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002789 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002790 }
Owen Taylor3473f882001-02-23 17:55:21 +00002791 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2792 if (xmlParserDebugEntities)
2793 xmlGenericError(xmlGenericErrorContext,
2794 "String decoding Entity Reference: %.30s\n",
2795 str);
2796 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002797 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2798 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002799 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002800 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002801 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002802 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002803 if ((ent != NULL) &&
2804 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2805 if (ent->content != NULL) {
2806 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002807 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002808 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002809 }
Owen Taylor3473f882001-02-23 17:55:21 +00002810 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002811 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2812 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002813 }
2814 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002815 ctxt->depth++;
2816 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2817 0, 0, 0);
2818 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002819
David Drysdale69030712015-11-20 11:13:45 +08002820 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2821 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2822 goto int_error;
2823
Owen Taylor3473f882001-02-23 17:55:21 +00002824 if (rep != NULL) {
2825 current = rep;
2826 while (*current != 0) { /* non input consuming loop */
2827 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002828 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002829 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002830 goto int_error;
2831 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002832 }
2833 }
2834 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002835 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002836 }
2837 } else if (ent != NULL) {
2838 int i = xmlStrlen(ent->name);
2839 const xmlChar *cur = ent->name;
2840
2841 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002842 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002843 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002844 }
2845 for (;i > 0;i--)
2846 buffer[nbchars++] = *cur++;
2847 buffer[nbchars++] = ';';
2848 }
2849 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2850 if (xmlParserDebugEntities)
2851 xmlGenericError(xmlGenericErrorContext,
2852 "String decoding PE Reference: %.30s\n", str);
2853 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002854 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2855 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002856 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002857 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002858 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002859 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002860 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002861 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002862 }
Owen Taylor3473f882001-02-23 17:55:21 +00002863 ctxt->depth++;
2864 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2865 0, 0, 0);
2866 ctxt->depth--;
2867 if (rep != NULL) {
2868 current = rep;
2869 while (*current != 0) { /* non input consuming loop */
2870 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002871 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002872 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002873 goto int_error;
2874 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002875 }
2876 }
2877 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002878 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002879 }
2880 }
2881 } else {
2882 COPY_BUF(l,buffer,nbchars,c);
2883 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002884 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2885 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002886 }
2887 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002888 if (str < last)
2889 c = CUR_SCHAR(str, l);
2890 else
2891 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002892 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002893 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002894 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002895
2896mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002897 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002898int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002899 if (rep != NULL)
2900 xmlFree(rep);
2901 if (buffer != NULL)
2902 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002903 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002904}
2905
Daniel Veillarde57ec792003-09-10 10:50:59 +00002906/**
2907 * xmlStringDecodeEntities:
2908 * @ctxt: the parser context
2909 * @str: the input string
2910 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2911 * @end: an end marker xmlChar, 0 if none
2912 * @end2: an end marker xmlChar, 0 if none
2913 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002914 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002915 * Takes a entity string content and process to do the adequate substitutions.
2916 *
2917 * [67] Reference ::= EntityRef | CharRef
2918 *
2919 * [69] PEReference ::= '%' Name ';'
2920 *
2921 * Returns A newly allocated string with the substitution done. The caller
2922 * must deallocate it !
2923 */
2924xmlChar *
2925xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2926 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002927 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002928 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2929 end, end2, end3));
2930}
Owen Taylor3473f882001-02-23 17:55:21 +00002931
2932/************************************************************************
2933 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002934 * Commodity functions, cleanup needed ? *
2935 * *
2936 ************************************************************************/
2937
2938/**
2939 * areBlanks:
2940 * @ctxt: an XML parser context
2941 * @str: a xmlChar *
2942 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002943 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002944 *
2945 * Is this a sequence of blank chars that one can ignore ?
2946 *
2947 * Returns 1 if ignorable 0 otherwise.
2948 */
2949
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002950static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2951 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002952 int i, ret;
2953 xmlNodePtr lastChild;
2954
Daniel Veillard05c13a22001-09-09 08:38:09 +00002955 /*
2956 * Don't spend time trying to differentiate them, the same callback is
2957 * used !
2958 */
2959 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002960 return(0);
2961
Owen Taylor3473f882001-02-23 17:55:21 +00002962 /*
2963 * Check for xml:space value.
2964 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002965 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2966 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002967 return(0);
2968
2969 /*
2970 * Check that the string is made of blanks
2971 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002972 if (blank_chars == 0) {
2973 for (i = 0;i < len;i++)
2974 if (!(IS_BLANK_CH(str[i]))) return(0);
2975 }
Owen Taylor3473f882001-02-23 17:55:21 +00002976
2977 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002978 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002979 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002980 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002981 if (ctxt->myDoc != NULL) {
2982 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2983 if (ret == 0) return(1);
2984 if (ret == 1) return(0);
2985 }
2986
2987 /*
2988 * Otherwise, heuristic :-\
2989 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002990 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002991 if ((ctxt->node->children == NULL) &&
2992 (RAW == '<') && (NXT(1) == '/')) return(0);
2993
2994 lastChild = xmlGetLastChild(ctxt->node);
2995 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002996 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2997 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002998 } else if (xmlNodeIsText(lastChild))
2999 return(0);
3000 else if ((ctxt->node->children != NULL) &&
3001 (xmlNodeIsText(ctxt->node->children)))
3002 return(0);
3003 return(1);
3004}
3005
Owen Taylor3473f882001-02-23 17:55:21 +00003006/************************************************************************
3007 * *
3008 * Extra stuff for namespace support *
3009 * Relates to http://www.w3.org/TR/WD-xml-names *
3010 * *
3011 ************************************************************************/
3012
3013/**
3014 * xmlSplitQName:
3015 * @ctxt: an XML parser context
3016 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003017 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00003018 *
3019 * parse an UTF8 encoded XML qualified name string
3020 *
3021 * [NS 5] QName ::= (Prefix ':')? LocalPart
3022 *
3023 * [NS 6] Prefix ::= NCName
3024 *
3025 * [NS 7] LocalPart ::= NCName
3026 *
3027 * Returns the local part, and prefix is updated
3028 * to get the Prefix if any.
3029 */
3030
3031xmlChar *
3032xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3033 xmlChar buf[XML_MAX_NAMELEN + 5];
3034 xmlChar *buffer = NULL;
3035 int len = 0;
3036 int max = XML_MAX_NAMELEN;
3037 xmlChar *ret = NULL;
3038 const xmlChar *cur = name;
3039 int c;
3040
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003041 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003042 *prefix = NULL;
3043
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00003044 if (cur == NULL) return(NULL);
3045
Owen Taylor3473f882001-02-23 17:55:21 +00003046#ifndef XML_XML_NAMESPACE
3047 /* xml: prefix is not really a namespace */
3048 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3049 (cur[2] == 'l') && (cur[3] == ':'))
3050 return(xmlStrdup(name));
3051#endif
3052
Daniel Veillard597bc482003-07-24 16:08:28 +00003053 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00003054 if (cur[0] == ':')
3055 return(xmlStrdup(name));
3056
3057 c = *cur++;
3058 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3059 buf[len++] = c;
3060 c = *cur++;
3061 }
3062 if (len >= max) {
3063 /*
3064 * Okay someone managed to make a huge name, so he's ready to pay
3065 * for the processing speed.
3066 */
3067 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003068
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003069 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003070 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003071 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003072 return(NULL);
3073 }
3074 memcpy(buffer, buf, len);
3075 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3076 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003077 xmlChar *tmp;
3078
Owen Taylor3473f882001-02-23 17:55:21 +00003079 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003080 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003081 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003082 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003083 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003084 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003085 return(NULL);
3086 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003087 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003088 }
3089 buffer[len++] = c;
3090 c = *cur++;
3091 }
3092 buffer[len] = 0;
3093 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003094
Daniel Veillard597bc482003-07-24 16:08:28 +00003095 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003096 if (buffer != NULL)
3097 xmlFree(buffer);
3098 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003099 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003100 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003101
Owen Taylor3473f882001-02-23 17:55:21 +00003102 if (buffer == NULL)
3103 ret = xmlStrndup(buf, len);
3104 else {
3105 ret = buffer;
3106 buffer = NULL;
3107 max = XML_MAX_NAMELEN;
3108 }
3109
3110
3111 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003112 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003113 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003114 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003115 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003116 }
Owen Taylor3473f882001-02-23 17:55:21 +00003117 len = 0;
3118
Daniel Veillardbb284f42002-10-16 18:02:47 +00003119 /*
3120 * Check that the first character is proper to start
3121 * a new name
3122 */
3123 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3124 ((c >= 0x41) && (c <= 0x5A)) ||
3125 (c == '_') || (c == ':'))) {
3126 int l;
3127 int first = CUR_SCHAR(cur, l);
3128
3129 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003130 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003131 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003132 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003133 }
3134 }
3135 cur++;
3136
Owen Taylor3473f882001-02-23 17:55:21 +00003137 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3138 buf[len++] = c;
3139 c = *cur++;
3140 }
3141 if (len >= max) {
3142 /*
3143 * Okay someone managed to make a huge name, so he's ready to pay
3144 * for the processing speed.
3145 */
3146 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003147
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003148 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003149 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003150 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003151 return(NULL);
3152 }
3153 memcpy(buffer, buf, len);
3154 while (c != 0) { /* tested bigname2.xml */
3155 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003156 xmlChar *tmp;
3157
Owen Taylor3473f882001-02-23 17:55:21 +00003158 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003159 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003160 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003161 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003162 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003163 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003164 return(NULL);
3165 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003166 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003167 }
3168 buffer[len++] = c;
3169 c = *cur++;
3170 }
3171 buffer[len] = 0;
3172 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003173
Owen Taylor3473f882001-02-23 17:55:21 +00003174 if (buffer == NULL)
3175 ret = xmlStrndup(buf, len);
3176 else {
3177 ret = buffer;
3178 }
3179 }
3180
3181 return(ret);
3182}
3183
3184/************************************************************************
3185 * *
3186 * The parser itself *
3187 * Relates to http://www.w3.org/TR/REC-xml *
3188 * *
3189 ************************************************************************/
3190
Daniel Veillard34e3f642008-07-29 09:02:27 +00003191/************************************************************************
3192 * *
3193 * Routines to parse Name, NCName and NmToken *
3194 * *
3195 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003196#ifdef DEBUG
3197static unsigned long nbParseName = 0;
3198static unsigned long nbParseNmToken = 0;
3199static unsigned long nbParseNCName = 0;
3200static unsigned long nbParseNCNameComplex = 0;
3201static unsigned long nbParseNameComplex = 0;
3202static unsigned long nbParseStringName = 0;
3203#endif
3204
Daniel Veillard34e3f642008-07-29 09:02:27 +00003205/*
3206 * The two following functions are related to the change of accepted
3207 * characters for Name and NmToken in the Revision 5 of XML-1.0
3208 * They correspond to the modified production [4] and the new production [4a]
3209 * changes in that revision. Also note that the macros used for the
3210 * productions Letter, Digit, CombiningChar and Extender are not needed
3211 * anymore.
3212 * We still keep compatibility to pre-revision5 parsing semantic if the
3213 * new XML_PARSE_OLD10 option is given to the parser.
3214 */
3215static int
3216xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3217 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3218 /*
3219 * Use the new checks of production [4] [4a] amd [5] of the
3220 * Update 5 of XML-1.0
3221 */
3222 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3223 (((c >= 'a') && (c <= 'z')) ||
3224 ((c >= 'A') && (c <= 'Z')) ||
3225 (c == '_') || (c == ':') ||
3226 ((c >= 0xC0) && (c <= 0xD6)) ||
3227 ((c >= 0xD8) && (c <= 0xF6)) ||
3228 ((c >= 0xF8) && (c <= 0x2FF)) ||
3229 ((c >= 0x370) && (c <= 0x37D)) ||
3230 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3231 ((c >= 0x200C) && (c <= 0x200D)) ||
3232 ((c >= 0x2070) && (c <= 0x218F)) ||
3233 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3234 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3235 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3236 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3237 ((c >= 0x10000) && (c <= 0xEFFFF))))
3238 return(1);
3239 } else {
3240 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3241 return(1);
3242 }
3243 return(0);
3244}
3245
3246static int
3247xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3248 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3249 /*
3250 * Use the new checks of production [4] [4a] amd [5] of the
3251 * Update 5 of XML-1.0
3252 */
3253 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3254 (((c >= 'a') && (c <= 'z')) ||
3255 ((c >= 'A') && (c <= 'Z')) ||
3256 ((c >= '0') && (c <= '9')) || /* !start */
3257 (c == '_') || (c == ':') ||
3258 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3259 ((c >= 0xC0) && (c <= 0xD6)) ||
3260 ((c >= 0xD8) && (c <= 0xF6)) ||
3261 ((c >= 0xF8) && (c <= 0x2FF)) ||
3262 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3263 ((c >= 0x370) && (c <= 0x37D)) ||
3264 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3265 ((c >= 0x200C) && (c <= 0x200D)) ||
3266 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3267 ((c >= 0x2070) && (c <= 0x218F)) ||
3268 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3269 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3270 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3271 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3272 ((c >= 0x10000) && (c <= 0xEFFFF))))
3273 return(1);
3274 } else {
3275 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3276 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003277 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003278 (IS_COMBINING(c)) ||
3279 (IS_EXTENDER(c)))
3280 return(1);
3281 }
3282 return(0);
3283}
3284
Daniel Veillarde57ec792003-09-10 10:50:59 +00003285static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003286 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003287
Daniel Veillard34e3f642008-07-29 09:02:27 +00003288static const xmlChar *
3289xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3290 int len = 0, l;
3291 int c;
3292 int count = 0;
3293
Daniel Veillardc6561462009-03-25 10:22:31 +00003294#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003295 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003296#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003297
3298 /*
3299 * Handler for more complex cases
3300 */
3301 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003302 if (ctxt->instate == XML_PARSER_EOF)
3303 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003304 c = CUR_CHAR(l);
3305 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3306 /*
3307 * Use the new checks of production [4] [4a] amd [5] of the
3308 * Update 5 of XML-1.0
3309 */
3310 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3311 (!(((c >= 'a') && (c <= 'z')) ||
3312 ((c >= 'A') && (c <= 'Z')) ||
3313 (c == '_') || (c == ':') ||
3314 ((c >= 0xC0) && (c <= 0xD6)) ||
3315 ((c >= 0xD8) && (c <= 0xF6)) ||
3316 ((c >= 0xF8) && (c <= 0x2FF)) ||
3317 ((c >= 0x370) && (c <= 0x37D)) ||
3318 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3319 ((c >= 0x200C) && (c <= 0x200D)) ||
3320 ((c >= 0x2070) && (c <= 0x218F)) ||
3321 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3322 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3323 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3324 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3325 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3326 return(NULL);
3327 }
3328 len += l;
3329 NEXTL(l);
3330 c = CUR_CHAR(l);
3331 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3332 (((c >= 'a') && (c <= 'z')) ||
3333 ((c >= 'A') && (c <= 'Z')) ||
3334 ((c >= '0') && (c <= '9')) || /* !start */
3335 (c == '_') || (c == ':') ||
3336 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3337 ((c >= 0xC0) && (c <= 0xD6)) ||
3338 ((c >= 0xD8) && (c <= 0xF6)) ||
3339 ((c >= 0xF8) && (c <= 0x2FF)) ||
3340 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3341 ((c >= 0x370) && (c <= 0x37D)) ||
3342 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3343 ((c >= 0x200C) && (c <= 0x200D)) ||
3344 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3345 ((c >= 0x2070) && (c <= 0x218F)) ||
3346 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3347 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3348 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3349 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3350 ((c >= 0x10000) && (c <= 0xEFFFF))
3351 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003352 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003353 count = 0;
3354 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003355 if (ctxt->instate == XML_PARSER_EOF)
3356 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003357 }
3358 len += l;
3359 NEXTL(l);
3360 c = CUR_CHAR(l);
3361 }
3362 } else {
3363 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3364 (!IS_LETTER(c) && (c != '_') &&
3365 (c != ':'))) {
3366 return(NULL);
3367 }
3368 len += l;
3369 NEXTL(l);
3370 c = CUR_CHAR(l);
3371
3372 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3373 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3374 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003375 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003376 (IS_COMBINING(c)) ||
3377 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003378 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003379 count = 0;
3380 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003381 if (ctxt->instate == XML_PARSER_EOF)
3382 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003383 }
3384 len += l;
3385 NEXTL(l);
3386 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003387 if (c == 0) {
3388 count = 0;
3389 GROW;
3390 if (ctxt->instate == XML_PARSER_EOF)
3391 return(NULL);
3392 c = CUR_CHAR(l);
3393 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003394 }
3395 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003396 if ((len > XML_MAX_NAME_LENGTH) &&
3397 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3398 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3399 return(NULL);
3400 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3402 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3403 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3404}
3405
Owen Taylor3473f882001-02-23 17:55:21 +00003406/**
3407 * xmlParseName:
3408 * @ctxt: an XML parser context
3409 *
3410 * parse an XML name.
3411 *
3412 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3413 * CombiningChar | Extender
3414 *
3415 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3416 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003417 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003418 *
3419 * Returns the Name parsed or NULL
3420 */
3421
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003422const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003423xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003424 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003425 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003426 int count = 0;
3427
3428 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003429
Daniel Veillardc6561462009-03-25 10:22:31 +00003430#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003431 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003432#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003433
Daniel Veillard48b2f892001-02-25 16:11:03 +00003434 /*
3435 * Accelerator for simple ASCII names
3436 */
3437 in = ctxt->input->cur;
3438 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3439 ((*in >= 0x41) && (*in <= 0x5A)) ||
3440 (*in == '_') || (*in == ':')) {
3441 in++;
3442 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3443 ((*in >= 0x41) && (*in <= 0x5A)) ||
3444 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003445 (*in == '_') || (*in == '-') ||
3446 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003447 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003448 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003449 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003450 if ((count > XML_MAX_NAME_LENGTH) &&
3451 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3452 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3453 return(NULL);
3454 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003455 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003456 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003457 ctxt->nbChars += count;
3458 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003459 if (ret == NULL)
3460 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003461 return(ret);
3462 }
3463 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003464 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003465 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003466}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003467
Daniel Veillard34e3f642008-07-29 09:02:27 +00003468static const xmlChar *
3469xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3470 int len = 0, l;
3471 int c;
3472 int count = 0;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003473 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
Daniel Veillard34e3f642008-07-29 09:02:27 +00003474
Daniel Veillardc6561462009-03-25 10:22:31 +00003475#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003476 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003477#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478
3479 /*
3480 * Handler for more complex cases
3481 */
3482 GROW;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003483 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003484 c = CUR_CHAR(l);
3485 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3486 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3487 return(NULL);
3488 }
3489
3490 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3491 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003492 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003493 if ((len > XML_MAX_NAME_LENGTH) &&
3494 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3495 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3496 return(NULL);
3497 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003498 count = 0;
3499 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003500 if (ctxt->instate == XML_PARSER_EOF)
3501 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003502 }
3503 len += l;
3504 NEXTL(l);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003505 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003506 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003507 if (c == 0) {
3508 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003509 /*
3510 * when shrinking to extend the buffer we really need to preserve
3511 * the part of the name we already parsed. Hence rolling back
3512 * by current lenght.
3513 */
3514 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003515 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003516 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003517 if (ctxt->instate == XML_PARSER_EOF)
3518 return(NULL);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003519 end = ctxt->input->cur;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003520 c = CUR_CHAR(l);
3521 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003522 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003523 if ((len > XML_MAX_NAME_LENGTH) &&
3524 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3525 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3526 return(NULL);
3527 }
Daniel Veillarddcc19502013-05-22 22:56:45 +02003528 return(xmlDictLookup(ctxt->dict, end - len, len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003529}
3530
3531/**
3532 * xmlParseNCName:
3533 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003534 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003535 *
3536 * parse an XML name.
3537 *
3538 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3539 * CombiningChar | Extender
3540 *
3541 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3542 *
3543 * Returns the Name parsed or NULL
3544 */
3545
3546static const xmlChar *
3547xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003548 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003549 const xmlChar *ret;
3550 int count = 0;
3551
Daniel Veillardc6561462009-03-25 10:22:31 +00003552#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003553 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003554#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003555
3556 /*
3557 * Accelerator for simple ASCII names
3558 */
3559 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003560 e = ctxt->input->end;
3561 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3562 ((*in >= 0x41) && (*in <= 0x5A)) ||
3563 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003564 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003565 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3566 ((*in >= 0x41) && (*in <= 0x5A)) ||
3567 ((*in >= 0x30) && (*in <= 0x39)) ||
3568 (*in == '_') || (*in == '-') ||
3569 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003570 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003571 if (in >= e)
3572 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003573 if ((*in > 0) && (*in < 0x80)) {
3574 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003575 if ((count > XML_MAX_NAME_LENGTH) &&
3576 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3577 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3578 return(NULL);
3579 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003580 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3581 ctxt->input->cur = in;
3582 ctxt->nbChars += count;
3583 ctxt->input->col += count;
3584 if (ret == NULL) {
3585 xmlErrMemory(ctxt, NULL);
3586 }
3587 return(ret);
3588 }
3589 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003590complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003591 return(xmlParseNCNameComplex(ctxt));
3592}
3593
Daniel Veillard46de64e2002-05-29 08:21:33 +00003594/**
3595 * xmlParseNameAndCompare:
3596 * @ctxt: an XML parser context
3597 *
3598 * parse an XML name and compares for match
3599 * (specialized for endtag parsing)
3600 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003601 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3602 * and the name for mismatch
3603 */
3604
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003605static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003606xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003607 register const xmlChar *cmp = other;
3608 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003609 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003610
3611 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003612 if (ctxt->instate == XML_PARSER_EOF)
3613 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003614
Daniel Veillard46de64e2002-05-29 08:21:33 +00003615 in = ctxt->input->cur;
3616 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003617 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003618 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003619 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003620 }
William M. Brack76e95df2003-10-18 16:20:14 +00003621 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003622 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003623 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003624 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003625 }
3626 /* failure (or end of input buffer), check with full function */
3627 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003628 /* strings coming from the dictionnary direct compare possible */
3629 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003630 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003631 }
3632 return ret;
3633}
3634
Owen Taylor3473f882001-02-23 17:55:21 +00003635/**
3636 * xmlParseStringName:
3637 * @ctxt: an XML parser context
3638 * @str: a pointer to the string pointer (IN/OUT)
3639 *
3640 * parse an XML name.
3641 *
3642 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3643 * CombiningChar | Extender
3644 *
3645 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3646 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003647 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003648 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003649 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003650 * is updated to the current location in the string.
3651 */
3652
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003653static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003654xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3655 xmlChar buf[XML_MAX_NAMELEN + 5];
3656 const xmlChar *cur = *str;
3657 int len = 0, l;
3658 int c;
3659
Daniel Veillardc6561462009-03-25 10:22:31 +00003660#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003661 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003662#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003663
Owen Taylor3473f882001-02-23 17:55:21 +00003664 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003665 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003666 return(NULL);
3667 }
3668
Daniel Veillard34e3f642008-07-29 09:02:27 +00003669 COPY_BUF(l,buf,len,c);
3670 cur += l;
3671 c = CUR_SCHAR(cur, l);
3672 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003673 COPY_BUF(l,buf,len,c);
3674 cur += l;
3675 c = CUR_SCHAR(cur, l);
3676 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3677 /*
3678 * Okay someone managed to make a huge name, so he's ready to pay
3679 * for the processing speed.
3680 */
3681 xmlChar *buffer;
3682 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003683
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003684 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003685 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003686 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003687 return(NULL);
3688 }
3689 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003690 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003691 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003692 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003693
3694 if ((len > XML_MAX_NAME_LENGTH) &&
3695 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3696 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3697 xmlFree(buffer);
3698 return(NULL);
3699 }
Owen Taylor3473f882001-02-23 17:55:21 +00003700 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003701 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003702 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003703 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003704 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003705 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003706 return(NULL);
3707 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003708 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003709 }
3710 COPY_BUF(l,buffer,len,c);
3711 cur += l;
3712 c = CUR_SCHAR(cur, l);
3713 }
3714 buffer[len] = 0;
3715 *str = cur;
3716 return(buffer);
3717 }
3718 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003719 if ((len > XML_MAX_NAME_LENGTH) &&
3720 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3721 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3722 return(NULL);
3723 }
Owen Taylor3473f882001-02-23 17:55:21 +00003724 *str = cur;
3725 return(xmlStrndup(buf, len));
3726}
3727
3728/**
3729 * xmlParseNmtoken:
3730 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003731 *
Owen Taylor3473f882001-02-23 17:55:21 +00003732 * parse an XML Nmtoken.
3733 *
3734 * [7] Nmtoken ::= (NameChar)+
3735 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003736 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003737 *
3738 * Returns the Nmtoken parsed or NULL
3739 */
3740
3741xmlChar *
3742xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3743 xmlChar buf[XML_MAX_NAMELEN + 5];
3744 int len = 0, l;
3745 int c;
3746 int count = 0;
3747
Daniel Veillardc6561462009-03-25 10:22:31 +00003748#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003749 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003750#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003751
Owen Taylor3473f882001-02-23 17:55:21 +00003752 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003753 if (ctxt->instate == XML_PARSER_EOF)
3754 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003755 c = CUR_CHAR(l);
3756
Daniel Veillard34e3f642008-07-29 09:02:27 +00003757 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003758 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003759 count = 0;
3760 GROW;
3761 }
3762 COPY_BUF(l,buf,len,c);
3763 NEXTL(l);
3764 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003765 if (c == 0) {
3766 count = 0;
3767 GROW;
3768 if (ctxt->instate == XML_PARSER_EOF)
3769 return(NULL);
3770 c = CUR_CHAR(l);
3771 }
Owen Taylor3473f882001-02-23 17:55:21 +00003772 if (len >= XML_MAX_NAMELEN) {
3773 /*
3774 * Okay someone managed to make a huge token, so he's ready to pay
3775 * for the processing speed.
3776 */
3777 xmlChar *buffer;
3778 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003779
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003780 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003781 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003782 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003783 return(NULL);
3784 }
3785 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003786 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003787 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003788 count = 0;
3789 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003790 if (ctxt->instate == XML_PARSER_EOF) {
3791 xmlFree(buffer);
3792 return(NULL);
3793 }
Owen Taylor3473f882001-02-23 17:55:21 +00003794 }
3795 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003796 xmlChar *tmp;
3797
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003798 if ((max > XML_MAX_NAME_LENGTH) &&
3799 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3800 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3801 xmlFree(buffer);
3802 return(NULL);
3803 }
Owen Taylor3473f882001-02-23 17:55:21 +00003804 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003805 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003806 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003807 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003808 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003809 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003810 return(NULL);
3811 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003812 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003813 }
3814 COPY_BUF(l,buffer,len,c);
3815 NEXTL(l);
3816 c = CUR_CHAR(l);
3817 }
3818 buffer[len] = 0;
3819 return(buffer);
3820 }
3821 }
3822 if (len == 0)
3823 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003824 if ((len > XML_MAX_NAME_LENGTH) &&
3825 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3826 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3827 return(NULL);
3828 }
Owen Taylor3473f882001-02-23 17:55:21 +00003829 return(xmlStrndup(buf, len));
3830}
3831
3832/**
3833 * xmlParseEntityValue:
3834 * @ctxt: an XML parser context
3835 * @orig: if non-NULL store a copy of the original entity value
3836 *
3837 * parse a value for ENTITY declarations
3838 *
3839 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3840 * "'" ([^%&'] | PEReference | Reference)* "'"
3841 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003842 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003843 */
3844
3845xmlChar *
3846xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3847 xmlChar *buf = NULL;
3848 int len = 0;
3849 int size = XML_PARSER_BUFFER_SIZE;
3850 int c, l;
3851 xmlChar stop;
3852 xmlChar *ret = NULL;
3853 const xmlChar *cur = NULL;
3854 xmlParserInputPtr input;
3855
3856 if (RAW == '"') stop = '"';
3857 else if (RAW == '\'') stop = '\'';
3858 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003859 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003860 return(NULL);
3861 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003862 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003863 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003864 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003865 return(NULL);
3866 }
3867
3868 /*
3869 * The content of the entity definition is copied in a buffer.
3870 */
3871
3872 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3873 input = ctxt->input;
3874 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003875 if (ctxt->instate == XML_PARSER_EOF) {
3876 xmlFree(buf);
3877 return(NULL);
3878 }
Owen Taylor3473f882001-02-23 17:55:21 +00003879 NEXT;
3880 c = CUR_CHAR(l);
3881 /*
3882 * NOTE: 4.4.5 Included in Literal
3883 * When a parameter entity reference appears in a literal entity
3884 * value, ... a single or double quote character in the replacement
3885 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003886 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003887 * In practice it means we stop the loop only when back at parsing
3888 * the initial entity and the quote is found
3889 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003890 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3891 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003892 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003893 xmlChar *tmp;
3894
Owen Taylor3473f882001-02-23 17:55:21 +00003895 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003896 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3897 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003898 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003899 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003900 return(NULL);
3901 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003902 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003903 }
3904 COPY_BUF(l,buf,len,c);
3905 NEXTL(l);
3906 /*
3907 * Pop-up of finished entities.
3908 */
3909 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3910 xmlPopInput(ctxt);
3911
3912 GROW;
3913 c = CUR_CHAR(l);
3914 if (c == 0) {
3915 GROW;
3916 c = CUR_CHAR(l);
3917 }
3918 }
3919 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003920 if (ctxt->instate == XML_PARSER_EOF) {
3921 xmlFree(buf);
3922 return(NULL);
3923 }
Owen Taylor3473f882001-02-23 17:55:21 +00003924
3925 /*
3926 * Raise problem w.r.t. '&' and '%' being used in non-entities
3927 * reference constructs. Note Charref will be handled in
3928 * xmlStringDecodeEntities()
3929 */
3930 cur = buf;
3931 while (*cur != 0) { /* non input consuming */
3932 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3933 xmlChar *name;
3934 xmlChar tmp = *cur;
3935
3936 cur++;
3937 name = xmlParseStringName(ctxt, &cur);
3938 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003939 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003940 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003941 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003942 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003943 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3944 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003945 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003946 }
3947 if (name != NULL)
3948 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003949 if (*cur == 0)
3950 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003951 }
3952 cur++;
3953 }
3954
3955 /*
3956 * Then PEReference entities are substituted.
3957 */
3958 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003959 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003960 xmlFree(buf);
3961 } else {
3962 NEXT;
3963 /*
3964 * NOTE: 4.4.7 Bypassed
3965 * When a general entity reference appears in the EntityValue in
3966 * an entity declaration, it is bypassed and left as is.
3967 * so XML_SUBSTITUTE_REF is not set here.
3968 */
3969 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3970 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003971 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003972 *orig = buf;
3973 else
3974 xmlFree(buf);
3975 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003976
Owen Taylor3473f882001-02-23 17:55:21 +00003977 return(ret);
3978}
3979
3980/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003981 * xmlParseAttValueComplex:
3982 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003983 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003984 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003985 *
3986 * parse a value for an attribute, this is the fallback function
3987 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003988 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003989 *
3990 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3991 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003992static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003993xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003994 xmlChar limit = 0;
3995 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003996 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003997 size_t len = 0;
3998 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003999 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004000 xmlChar *current = NULL;
4001 xmlEntityPtr ent;
4002
Owen Taylor3473f882001-02-23 17:55:21 +00004003 if (NXT(0) == '"') {
4004 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4005 limit = '"';
4006 NEXT;
4007 } else if (NXT(0) == '\'') {
4008 limit = '\'';
4009 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4010 NEXT;
4011 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004012 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004013 return(NULL);
4014 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00004015
Owen Taylor3473f882001-02-23 17:55:21 +00004016 /*
4017 * allocate a translation buffer.
4018 */
4019 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004020 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004021 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00004022
4023 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004024 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00004025 */
4026 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004027 while (((NXT(0) != limit) && /* checked */
4028 (IS_CHAR(c)) && (c != '<')) &&
4029 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08004030 /*
4031 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4032 * special option is given
4033 */
4034 if ((len > XML_MAX_TEXT_LENGTH) &&
4035 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4036 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004037 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08004038 goto mem_error;
4039 }
Owen Taylor3473f882001-02-23 17:55:21 +00004040 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00004041 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00004042 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004043 if (NXT(1) == '#') {
4044 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004045
Owen Taylor3473f882001-02-23 17:55:21 +00004046 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00004047 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004048 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004049 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004050 }
4051 buf[len++] = '&';
4052 } else {
4053 /*
4054 * The reparsing will be done in xmlStringGetNodeList()
4055 * called by the attribute() function in SAX.c
4056 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08004057 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004058 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004059 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004060 buf[len++] = '&';
4061 buf[len++] = '#';
4062 buf[len++] = '3';
4063 buf[len++] = '8';
4064 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00004065 }
Daniel Veillarddc171602008-03-26 17:41:38 +00004066 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004067 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004068 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004069 }
Owen Taylor3473f882001-02-23 17:55:21 +00004070 len += xmlCopyChar(0, &buf[len], val);
4071 }
4072 } else {
4073 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004074 ctxt->nbentities++;
4075 if (ent != NULL)
4076 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004077 if ((ent != NULL) &&
4078 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004079 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004080 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004081 }
4082 if ((ctxt->replaceEntities == 0) &&
4083 (ent->content[0] == '&')) {
4084 buf[len++] = '&';
4085 buf[len++] = '#';
4086 buf[len++] = '3';
4087 buf[len++] = '8';
4088 buf[len++] = ';';
4089 } else {
4090 buf[len++] = ent->content[0];
4091 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004092 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004093 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004094 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4095 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004096 XML_SUBSTITUTE_REF,
4097 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004098 if (rep != NULL) {
4099 current = rep;
4100 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004101 if ((*current == 0xD) || (*current == 0xA) ||
4102 (*current == 0x9)) {
4103 buf[len++] = 0x20;
4104 current++;
4105 } else
4106 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004107 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004108 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004109 }
4110 }
4111 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004112 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004113 }
4114 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004115 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004116 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004117 }
Owen Taylor3473f882001-02-23 17:55:21 +00004118 if (ent->content != NULL)
4119 buf[len++] = ent->content[0];
4120 }
4121 } else if (ent != NULL) {
4122 int i = xmlStrlen(ent->name);
4123 const xmlChar *cur = ent->name;
4124
4125 /*
4126 * This may look absurd but is needed to detect
4127 * entities problems
4128 */
4129 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004130 (ent->content != NULL) && (ent->checked == 0)) {
4131 unsigned long oldnbent = ctxt->nbentities;
4132
Owen Taylor3473f882001-02-23 17:55:21 +00004133 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004134 XML_SUBSTITUTE_REF, 0, 0, 0);
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004135
Daniel Veillardcff25462013-03-11 15:57:55 +08004136 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004137 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004138 if (xmlStrchr(rep, '<'))
4139 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004140 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004141 rep = NULL;
4142 }
Owen Taylor3473f882001-02-23 17:55:21 +00004143 }
4144
4145 /*
4146 * Just output the reference
4147 */
4148 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004149 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004150 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004151 }
4152 for (;i > 0;i--)
4153 buf[len++] = *cur++;
4154 buf[len++] = ';';
4155 }
4156 }
4157 } else {
4158 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004159 if ((len != 0) || (!normalize)) {
4160 if ((!normalize) || (!in_space)) {
4161 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004162 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004163 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004164 }
4165 }
4166 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004167 }
4168 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004169 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004170 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004171 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004172 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004173 }
4174 }
4175 NEXTL(l);
4176 }
4177 GROW;
4178 c = CUR_CHAR(l);
4179 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004180 if (ctxt->instate == XML_PARSER_EOF)
4181 goto error;
4182
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004183 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004184 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004185 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004186 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004187 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004188 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004189 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004190 if ((c != 0) && (!IS_CHAR(c))) {
4191 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4192 "invalid character in attribute value\n");
4193 } else {
4194 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4195 "AttValue: ' expected\n");
4196 }
Owen Taylor3473f882001-02-23 17:55:21 +00004197 } else
4198 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004199
4200 /*
4201 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004202 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004203 */
4204 if (len >= INT_MAX) {
4205 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004206 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004207 goto mem_error;
4208 }
4209
4210 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004211 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004212
4213mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004214 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004215error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004216 if (buf != NULL)
4217 xmlFree(buf);
4218 if (rep != NULL)
4219 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004220 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004221}
4222
4223/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004224 * xmlParseAttValue:
4225 * @ctxt: an XML parser context
4226 *
4227 * parse a value for an attribute
4228 * Note: the parser won't do substitution of entities here, this
4229 * will be handled later in xmlStringGetNodeList
4230 *
4231 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4232 * "'" ([^<&'] | Reference)* "'"
4233 *
4234 * 3.3.3 Attribute-Value Normalization:
4235 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004236 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004237 * - a character reference is processed by appending the referenced
4238 * character to the attribute value
4239 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004240 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004241 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4242 * appending #x20 to the normalized value, except that only a single
4243 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004244 * parsed entity or the literal entity value of an internal parsed entity
4245 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004246 * If the declared value is not CDATA, then the XML processor must further
4247 * process the normalized attribute value by discarding any leading and
4248 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004249 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004250 * All attributes for which no declaration has been read should be treated
4251 * by a non-validating parser as if declared CDATA.
4252 *
4253 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4254 */
4255
4256
4257xmlChar *
4258xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004259 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004260 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004261}
4262
4263/**
Owen Taylor3473f882001-02-23 17:55:21 +00004264 * xmlParseSystemLiteral:
4265 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004266 *
Owen Taylor3473f882001-02-23 17:55:21 +00004267 * parse an XML Literal
4268 *
4269 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4270 *
4271 * Returns the SystemLiteral parsed or NULL
4272 */
4273
4274xmlChar *
4275xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4276 xmlChar *buf = NULL;
4277 int len = 0;
4278 int size = XML_PARSER_BUFFER_SIZE;
4279 int cur, l;
4280 xmlChar stop;
4281 int state = ctxt->instate;
4282 int count = 0;
4283
4284 SHRINK;
4285 if (RAW == '"') {
4286 NEXT;
4287 stop = '"';
4288 } else if (RAW == '\'') {
4289 NEXT;
4290 stop = '\'';
4291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004293 return(NULL);
4294 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004295
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004296 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004297 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004298 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004299 return(NULL);
4300 }
4301 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4302 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004303 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004304 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004305 xmlChar *tmp;
4306
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004307 if ((size > XML_MAX_NAME_LENGTH) &&
4308 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4309 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4310 xmlFree(buf);
4311 ctxt->instate = (xmlParserInputState) state;
4312 return(NULL);
4313 }
Owen Taylor3473f882001-02-23 17:55:21 +00004314 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004315 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4316 if (tmp == NULL) {
4317 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004318 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004319 ctxt->instate = (xmlParserInputState) state;
4320 return(NULL);
4321 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004322 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
4324 count++;
4325 if (count > 50) {
4326 GROW;
4327 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004328 if (ctxt->instate == XML_PARSER_EOF) {
4329 xmlFree(buf);
4330 return(NULL);
4331 }
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
4333 COPY_BUF(l,buf,len,cur);
4334 NEXTL(l);
4335 cur = CUR_CHAR(l);
4336 if (cur == 0) {
4337 GROW;
4338 SHRINK;
4339 cur = CUR_CHAR(l);
4340 }
4341 }
4342 buf[len] = 0;
4343 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004344 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004345 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004346 } else {
4347 NEXT;
4348 }
4349 return(buf);
4350}
4351
4352/**
4353 * xmlParsePubidLiteral:
4354 * @ctxt: an XML parser context
4355 *
4356 * parse an XML public literal
4357 *
4358 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4359 *
4360 * Returns the PubidLiteral parsed or NULL.
4361 */
4362
4363xmlChar *
4364xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4365 xmlChar *buf = NULL;
4366 int len = 0;
4367 int size = XML_PARSER_BUFFER_SIZE;
4368 xmlChar cur;
4369 xmlChar stop;
4370 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004371 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004372
4373 SHRINK;
4374 if (RAW == '"') {
4375 NEXT;
4376 stop = '"';
4377 } else if (RAW == '\'') {
4378 NEXT;
4379 stop = '\'';
4380 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004381 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004382 return(NULL);
4383 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004384 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004385 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004386 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004387 return(NULL);
4388 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004389 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004390 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004391 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004392 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004393 xmlChar *tmp;
4394
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004395 if ((size > XML_MAX_NAME_LENGTH) &&
4396 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4397 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4398 xmlFree(buf);
4399 return(NULL);
4400 }
Owen Taylor3473f882001-02-23 17:55:21 +00004401 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004402 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4403 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004404 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004405 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004406 return(NULL);
4407 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004408 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004409 }
4410 buf[len++] = cur;
4411 count++;
4412 if (count > 50) {
4413 GROW;
4414 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004415 if (ctxt->instate == XML_PARSER_EOF) {
4416 xmlFree(buf);
4417 return(NULL);
4418 }
Owen Taylor3473f882001-02-23 17:55:21 +00004419 }
4420 NEXT;
4421 cur = CUR;
4422 if (cur == 0) {
4423 GROW;
4424 SHRINK;
4425 cur = CUR;
4426 }
4427 }
4428 buf[len] = 0;
4429 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004431 } else {
4432 NEXT;
4433 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004434 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004435 return(buf);
4436}
4437
Daniel Veillard8ed10722009-08-20 19:17:36 +02004438static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004439
4440/*
4441 * used for the test in the inner loop of the char data testing
4442 */
4443static const unsigned char test_char_data[256] = {
4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4446 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4449 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4450 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4451 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4452 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4453 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4454 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4455 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4456 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4457 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4458 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4459 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4460 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4461 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4462 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4463 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4464 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4465 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4467 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4469 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4474 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4475 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4476};
4477
Owen Taylor3473f882001-02-23 17:55:21 +00004478/**
4479 * xmlParseCharData:
4480 * @ctxt: an XML parser context
4481 * @cdata: int indicating whether we are within a CDATA section
4482 *
4483 * parse a CharData section.
4484 * if we are within a CDATA section ']]>' marks an end of section.
4485 *
4486 * The right angle bracket (>) may be represented using the string "&gt;",
4487 * and must, for compatibility, be escaped using "&gt;" or a character
4488 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004489 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004490 *
4491 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4492 */
4493
4494void
4495xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004496 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004497 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004498 int line = ctxt->input->line;
4499 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004500 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004501
4502 SHRINK;
4503 GROW;
4504 /*
4505 * Accelerated common case where input don't need to be
4506 * modified before passing it to the handler.
4507 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004508 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004509 in = ctxt->input->cur;
4510 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004511get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004512 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004513 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004514 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004515 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004516 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004517 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004518 goto get_more_space;
4519 }
4520 if (*in == '<') {
4521 nbchar = in - ctxt->input->cur;
4522 if (nbchar > 0) {
4523 const xmlChar *tmp = ctxt->input->cur;
4524 ctxt->input->cur = in;
4525
Daniel Veillard34099b42004-11-04 17:34:35 +00004526 if ((ctxt->sax != NULL) &&
4527 (ctxt->sax->ignorableWhitespace !=
4528 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004529 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004530 if (ctxt->sax->ignorableWhitespace != NULL)
4531 ctxt->sax->ignorableWhitespace(ctxt->userData,
4532 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004533 } else {
4534 if (ctxt->sax->characters != NULL)
4535 ctxt->sax->characters(ctxt->userData,
4536 tmp, nbchar);
4537 if (*ctxt->space == -1)
4538 *ctxt->space = -2;
4539 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004540 } else if ((ctxt->sax != NULL) &&
4541 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004542 ctxt->sax->characters(ctxt->userData,
4543 tmp, nbchar);
4544 }
4545 }
4546 return;
4547 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004548
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004549get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004550 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004551 while (test_char_data[*in]) {
4552 in++;
4553 ccol++;
4554 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004555 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004556 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004557 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004558 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004559 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004560 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004561 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004562 }
4563 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004564 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004565 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004566 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004567 return;
4568 }
4569 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004570 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004571 goto get_more;
4572 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004573 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004574 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004575 if ((ctxt->sax != NULL) &&
4576 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004577 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004578 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004579 const xmlChar *tmp = ctxt->input->cur;
4580 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004581
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004582 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004583 if (ctxt->sax->ignorableWhitespace != NULL)
4584 ctxt->sax->ignorableWhitespace(ctxt->userData,
4585 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004586 } else {
4587 if (ctxt->sax->characters != NULL)
4588 ctxt->sax->characters(ctxt->userData,
4589 tmp, nbchar);
4590 if (*ctxt->space == -1)
4591 *ctxt->space = -2;
4592 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004593 line = ctxt->input->line;
4594 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004595 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004596 if (ctxt->sax->characters != NULL)
4597 ctxt->sax->characters(ctxt->userData,
4598 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004599 line = ctxt->input->line;
4600 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004601 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004602 /* something really bad happened in the SAX callback */
4603 if (ctxt->instate != XML_PARSER_CONTENT)
4604 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004605 }
4606 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004607 if (*in == 0xD) {
4608 in++;
4609 if (*in == 0xA) {
4610 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004611 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004612 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004613 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004614 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004615 in--;
4616 }
4617 if (*in == '<') {
4618 return;
4619 }
4620 if (*in == '&') {
4621 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004622 }
4623 SHRINK;
4624 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004625 if (ctxt->instate == XML_PARSER_EOF)
4626 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004627 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004628 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004629 nbchar = 0;
4630 }
Daniel Veillard50582112001-03-26 22:52:16 +00004631 ctxt->input->line = line;
4632 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004633 xmlParseCharDataComplex(ctxt, cdata);
4634}
4635
Daniel Veillard01c13b52002-12-10 15:19:08 +00004636/**
4637 * xmlParseCharDataComplex:
4638 * @ctxt: an XML parser context
4639 * @cdata: int indicating whether we are within a CDATA section
4640 *
4641 * parse a CharData section.this is the fallback function
4642 * of xmlParseCharData() when the parsing requires handling
4643 * of non-ASCII characters.
4644 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004645static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004646xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004647 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4648 int nbchar = 0;
4649 int cur, l;
4650 int count = 0;
4651
4652 SHRINK;
4653 GROW;
4654 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004655 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004656 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004657 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004658 if ((cur == ']') && (NXT(1) == ']') &&
4659 (NXT(2) == '>')) {
4660 if (cdata) break;
4661 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004662 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004663 }
4664 }
4665 COPY_BUF(l,buf,nbchar,cur);
4666 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004667 buf[nbchar] = 0;
4668
Owen Taylor3473f882001-02-23 17:55:21 +00004669 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004670 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004671 */
4672 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004673 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004674 if (ctxt->sax->ignorableWhitespace != NULL)
4675 ctxt->sax->ignorableWhitespace(ctxt->userData,
4676 buf, nbchar);
4677 } else {
4678 if (ctxt->sax->characters != NULL)
4679 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004680 if ((ctxt->sax->characters !=
4681 ctxt->sax->ignorableWhitespace) &&
4682 (*ctxt->space == -1))
4683 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004684 }
4685 }
4686 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004687 /* something really bad happened in the SAX callback */
4688 if (ctxt->instate != XML_PARSER_CONTENT)
4689 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004690 }
4691 count++;
4692 if (count > 50) {
4693 GROW;
4694 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004695 if (ctxt->instate == XML_PARSER_EOF)
4696 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004697 }
4698 NEXTL(l);
4699 cur = CUR_CHAR(l);
4700 }
4701 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004702 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004704 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004705 */
4706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004707 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004708 if (ctxt->sax->ignorableWhitespace != NULL)
4709 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4710 } else {
4711 if (ctxt->sax->characters != NULL)
4712 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004713 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4714 (*ctxt->space == -1))
4715 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004716 }
4717 }
4718 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004719 if ((cur != 0) && (!IS_CHAR(cur))) {
4720 /* Generate the error and skip the offending character */
4721 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4722 "PCDATA invalid Char value %d\n",
4723 cur);
4724 NEXTL(l);
4725 }
Owen Taylor3473f882001-02-23 17:55:21 +00004726}
4727
4728/**
4729 * xmlParseExternalID:
4730 * @ctxt: an XML parser context
4731 * @publicID: a xmlChar** receiving PubidLiteral
4732 * @strict: indicate whether we should restrict parsing to only
4733 * production [75], see NOTE below
4734 *
4735 * Parse an External ID or a Public ID
4736 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004737 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004738 * 'PUBLIC' S PubidLiteral S SystemLiteral
4739 *
4740 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4741 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4742 *
4743 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4744 *
4745 * Returns the function returns SystemLiteral and in the second
4746 * case publicID receives PubidLiteral, is strict is off
4747 * it is possible to return NULL and have publicID set.
4748 */
4749
4750xmlChar *
4751xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4752 xmlChar *URI = NULL;
4753
4754 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004755
4756 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004757 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004758 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004759 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004760 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4761 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004762 }
4763 SKIP_BLANKS;
4764 URI = xmlParseSystemLiteral(ctxt);
4765 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004766 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004767 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004768 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004769 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004770 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004771 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004772 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004773 }
4774 SKIP_BLANKS;
4775 *publicID = xmlParsePubidLiteral(ctxt);
4776 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004777 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004778 }
4779 if (strict) {
4780 /*
4781 * We don't handle [83] so "S SystemLiteral" is required.
4782 */
William M. Brack76e95df2003-10-18 16:20:14 +00004783 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004784 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004785 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004786 }
4787 } else {
4788 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004789 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004790 * "S SystemLiteral" is not detected. From a purely parsing
4791 * point of view that's a nice mess.
4792 */
4793 const xmlChar *ptr;
4794 GROW;
4795
4796 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004797 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004798
William M. Brack76e95df2003-10-18 16:20:14 +00004799 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004800 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4801 }
4802 SKIP_BLANKS;
4803 URI = xmlParseSystemLiteral(ctxt);
4804 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004805 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004806 }
4807 }
4808 return(URI);
4809}
4810
4811/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004812 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004813 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004814 * @buf: the already parsed part of the buffer
4815 * @len: number of bytes filles in the buffer
4816 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004817 *
4818 * Skip an XML (SGML) comment <!-- .... -->
4819 * The spec says that "For compatibility, the string "--" (double-hyphen)
4820 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004821 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004822 *
4823 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4824 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004825static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004826xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4827 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004828 int q, ql;
4829 int r, rl;
4830 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004831 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004832 int inputid;
4833
4834 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004835
Owen Taylor3473f882001-02-23 17:55:21 +00004836 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004837 len = 0;
4838 size = XML_PARSER_BUFFER_SIZE;
4839 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4840 if (buf == NULL) {
4841 xmlErrMemory(ctxt, NULL);
4842 return;
4843 }
Owen Taylor3473f882001-02-23 17:55:21 +00004844 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004845 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004846 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004847 if (q == 0)
4848 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004849 if (!IS_CHAR(q)) {
4850 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4851 "xmlParseComment: invalid xmlChar value %d\n",
4852 q);
4853 xmlFree (buf);
4854 return;
4855 }
Owen Taylor3473f882001-02-23 17:55:21 +00004856 NEXTL(ql);
4857 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004858 if (r == 0)
4859 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004860 if (!IS_CHAR(r)) {
4861 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4862 "xmlParseComment: invalid xmlChar value %d\n",
4863 q);
4864 xmlFree (buf);
4865 return;
4866 }
Owen Taylor3473f882001-02-23 17:55:21 +00004867 NEXTL(rl);
4868 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004869 if (cur == 0)
4870 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004871 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004872 ((cur != '>') ||
4873 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004874 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004875 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004876 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004877 if ((len > XML_MAX_TEXT_LENGTH) &&
4878 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4879 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4880 "Comment too big found", NULL);
4881 xmlFree (buf);
4882 return;
4883 }
Owen Taylor3473f882001-02-23 17:55:21 +00004884 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004885 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004886 size_t new_size;
4887
4888 new_size = size * 2;
4889 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004890 if (new_buf == NULL) {
4891 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004892 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004893 return;
4894 }
William M. Bracka3215c72004-07-31 16:24:01 +00004895 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004896 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004897 }
4898 COPY_BUF(ql,buf,len,q);
4899 q = r;
4900 ql = rl;
4901 r = cur;
4902 rl = l;
4903
4904 count++;
4905 if (count > 50) {
4906 GROW;
4907 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004908 if (ctxt->instate == XML_PARSER_EOF) {
4909 xmlFree(buf);
4910 return;
4911 }
Owen Taylor3473f882001-02-23 17:55:21 +00004912 }
4913 NEXTL(l);
4914 cur = CUR_CHAR(l);
4915 if (cur == 0) {
4916 SHRINK;
4917 GROW;
4918 cur = CUR_CHAR(l);
4919 }
4920 }
4921 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004922 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004923 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004924 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004925 } else if (!IS_CHAR(cur)) {
4926 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4927 "xmlParseComment: invalid xmlChar value %d\n",
4928 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004929 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004930 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004931 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4932 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004933 }
4934 NEXT;
4935 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4936 (!ctxt->disableSAX))
4937 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004938 }
Daniel Veillardda629342007-08-01 07:49:06 +00004939 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004940 return;
4941not_terminated:
4942 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4943 "Comment not terminated\n", NULL);
4944 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004945 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004946}
Daniel Veillardda629342007-08-01 07:49:06 +00004947
Daniel Veillard4c778d82005-01-23 17:37:44 +00004948/**
4949 * xmlParseComment:
4950 * @ctxt: an XML parser context
4951 *
4952 * Skip an XML (SGML) comment <!-- .... -->
4953 * The spec says that "For compatibility, the string "--" (double-hyphen)
4954 * must not occur within comments. "
4955 *
4956 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4957 */
4958void
4959xmlParseComment(xmlParserCtxtPtr ctxt) {
4960 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004961 size_t size = XML_PARSER_BUFFER_SIZE;
4962 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004963 xmlParserInputState state;
4964 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004965 size_t nbchar = 0;
4966 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004967 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004968
4969 /*
4970 * Check that there is a comment right here.
4971 */
4972 if ((RAW != '<') || (NXT(1) != '!') ||
4973 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004974 state = ctxt->instate;
4975 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004976 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004977 SKIP(4);
4978 SHRINK;
4979 GROW;
4980
4981 /*
4982 * Accelerated common case where input don't need to be
4983 * modified before passing it to the handler.
4984 */
4985 in = ctxt->input->cur;
4986 do {
4987 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004988 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004989 ctxt->input->line++; ctxt->input->col = 1;
4990 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004991 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004992 }
4993get_more:
4994 ccol = ctxt->input->col;
4995 while (((*in > '-') && (*in <= 0x7F)) ||
4996 ((*in >= 0x20) && (*in < '-')) ||
4997 (*in == 0x09)) {
4998 in++;
4999 ccol++;
5000 }
5001 ctxt->input->col = ccol;
5002 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005003 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005004 ctxt->input->line++; ctxt->input->col = 1;
5005 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005006 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005007 goto get_more;
5008 }
5009 nbchar = in - ctxt->input->cur;
5010 /*
5011 * save current set of data
5012 */
5013 if (nbchar > 0) {
5014 if ((ctxt->sax != NULL) &&
5015 (ctxt->sax->comment != NULL)) {
5016 if (buf == NULL) {
5017 if ((*in == '-') && (in[1] == '-'))
5018 size = nbchar + 1;
5019 else
5020 size = XML_PARSER_BUFFER_SIZE + nbchar;
5021 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5022 if (buf == NULL) {
5023 xmlErrMemory(ctxt, NULL);
5024 ctxt->instate = state;
5025 return;
5026 }
5027 len = 0;
5028 } else if (len + nbchar + 1 >= size) {
5029 xmlChar *new_buf;
5030 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5031 new_buf = (xmlChar *) xmlRealloc(buf,
5032 size * sizeof(xmlChar));
5033 if (new_buf == NULL) {
5034 xmlFree (buf);
5035 xmlErrMemory(ctxt, NULL);
5036 ctxt->instate = state;
5037 return;
5038 }
5039 buf = new_buf;
5040 }
5041 memcpy(&buf[len], ctxt->input->cur, nbchar);
5042 len += nbchar;
5043 buf[len] = 0;
5044 }
5045 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08005046 if ((len > XML_MAX_TEXT_LENGTH) &&
5047 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5048 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5049 "Comment too big found", NULL);
5050 xmlFree (buf);
5051 return;
5052 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005053 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00005054 if (*in == 0xA) {
5055 in++;
5056 ctxt->input->line++; ctxt->input->col = 1;
5057 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005058 if (*in == 0xD) {
5059 in++;
5060 if (*in == 0xA) {
5061 ctxt->input->cur = in;
5062 in++;
5063 ctxt->input->line++; ctxt->input->col = 1;
5064 continue; /* while */
5065 }
5066 in--;
5067 }
5068 SHRINK;
5069 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005070 if (ctxt->instate == XML_PARSER_EOF) {
5071 xmlFree(buf);
5072 return;
5073 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005074 in = ctxt->input->cur;
5075 if (*in == '-') {
5076 if (in[1] == '-') {
5077 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005078 if (ctxt->input->id != inputid) {
5079 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5080 "comment doesn't start and stop in the same entity\n");
5081 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005082 SKIP(3);
5083 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5084 (!ctxt->disableSAX)) {
5085 if (buf != NULL)
5086 ctxt->sax->comment(ctxt->userData, buf);
5087 else
5088 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5089 }
5090 if (buf != NULL)
5091 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005092 if (ctxt->instate != XML_PARSER_EOF)
5093 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005094 return;
5095 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005096 if (buf != NULL) {
5097 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5098 "Double hyphen within comment: "
5099 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005100 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005101 } else
5102 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5103 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005104 in++;
5105 ctxt->input->col++;
5106 }
5107 in++;
5108 ctxt->input->col++;
5109 goto get_more;
5110 }
5111 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5112 xmlParseCommentComplex(ctxt, buf, len, size);
5113 ctxt->instate = state;
5114 return;
5115}
5116
Owen Taylor3473f882001-02-23 17:55:21 +00005117
5118/**
5119 * xmlParsePITarget:
5120 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005121 *
Owen Taylor3473f882001-02-23 17:55:21 +00005122 * parse the name of a PI
5123 *
5124 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5125 *
5126 * Returns the PITarget name or NULL
5127 */
5128
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005129const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005130xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005131 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005132
5133 name = xmlParseName(ctxt);
5134 if ((name != NULL) &&
5135 ((name[0] == 'x') || (name[0] == 'X')) &&
5136 ((name[1] == 'm') || (name[1] == 'M')) &&
5137 ((name[2] == 'l') || (name[2] == 'L'))) {
5138 int i;
5139 if ((name[0] == 'x') && (name[1] == 'm') &&
5140 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005141 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005142 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005143 return(name);
5144 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005145 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005146 return(name);
5147 }
5148 for (i = 0;;i++) {
5149 if (xmlW3CPIs[i] == NULL) break;
5150 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5151 return(name);
5152 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005153 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5154 "xmlParsePITarget: invalid name prefix 'xml'\n",
5155 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005156 }
Daniel Veillard37334572008-07-31 08:20:02 +00005157 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005158 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005159 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005160 }
Owen Taylor3473f882001-02-23 17:55:21 +00005161 return(name);
5162}
5163
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005164#ifdef LIBXML_CATALOG_ENABLED
5165/**
5166 * xmlParseCatalogPI:
5167 * @ctxt: an XML parser context
5168 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005169 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005170 * parse an XML Catalog Processing Instruction.
5171 *
5172 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5173 *
5174 * Occurs only if allowed by the user and if happening in the Misc
5175 * part of the document before any doctype informations
5176 * This will add the given catalog to the parsing context in order
5177 * to be used if there is a resolution need further down in the document
5178 */
5179
5180static void
5181xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5182 xmlChar *URL = NULL;
5183 const xmlChar *tmp, *base;
5184 xmlChar marker;
5185
5186 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005187 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005188 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5189 goto error;
5190 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005191 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005192 if (*tmp != '=') {
5193 return;
5194 }
5195 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005196 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005197 marker = *tmp;
5198 if ((marker != '\'') && (marker != '"'))
5199 goto error;
5200 tmp++;
5201 base = tmp;
5202 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5203 if (*tmp == 0)
5204 goto error;
5205 URL = xmlStrndup(base, tmp - base);
5206 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005207 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005208 if (*tmp != 0)
5209 goto error;
5210
5211 if (URL != NULL) {
5212 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5213 xmlFree(URL);
5214 }
5215 return;
5216
5217error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005218 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5219 "Catalog PI syntax error: %s\n",
5220 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005221 if (URL != NULL)
5222 xmlFree(URL);
5223}
5224#endif
5225
Owen Taylor3473f882001-02-23 17:55:21 +00005226/**
5227 * xmlParsePI:
5228 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005229 *
Owen Taylor3473f882001-02-23 17:55:21 +00005230 * parse an XML Processing Instruction.
5231 *
5232 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5233 *
5234 * The processing is transfered to SAX once parsed.
5235 */
5236
5237void
5238xmlParsePI(xmlParserCtxtPtr ctxt) {
5239 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005240 size_t len = 0;
5241 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005242 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005243 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005244 xmlParserInputState state;
5245 int count = 0;
5246
5247 if ((RAW == '<') && (NXT(1) == '?')) {
5248 xmlParserInputPtr input = ctxt->input;
5249 state = ctxt->instate;
5250 ctxt->instate = XML_PARSER_PI;
5251 /*
5252 * this is a Processing Instruction.
5253 */
5254 SKIP(2);
5255 SHRINK;
5256
5257 /*
5258 * Parse the target name and check for special support like
5259 * namespace.
5260 */
5261 target = xmlParsePITarget(ctxt);
5262 if (target != NULL) {
5263 if ((RAW == '?') && (NXT(1) == '>')) {
5264 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005265 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5266 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005267 }
5268 SKIP(2);
5269
5270 /*
5271 * SAX: PI detected.
5272 */
5273 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5274 (ctxt->sax->processingInstruction != NULL))
5275 ctxt->sax->processingInstruction(ctxt->userData,
5276 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005277 if (ctxt->instate != XML_PARSER_EOF)
5278 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005279 return;
5280 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005281 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005282 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005283 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005284 ctxt->instate = state;
5285 return;
5286 }
5287 cur = CUR;
5288 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005289 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5290 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005291 }
5292 SKIP_BLANKS;
5293 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005294 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005295 ((cur != '?') || (NXT(1) != '>'))) {
5296 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005297 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005298 size_t new_size = size * 2;
5299 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005300 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005301 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005302 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005303 ctxt->instate = state;
5304 return;
5305 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005306 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005307 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005308 }
5309 count++;
5310 if (count > 50) {
5311 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005312 if (ctxt->instate == XML_PARSER_EOF) {
5313 xmlFree(buf);
5314 return;
5315 }
Owen Taylor3473f882001-02-23 17:55:21 +00005316 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005317 if ((len > XML_MAX_TEXT_LENGTH) &&
5318 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5319 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5320 "PI %s too big found", target);
5321 xmlFree(buf);
5322 ctxt->instate = state;
5323 return;
5324 }
Owen Taylor3473f882001-02-23 17:55:21 +00005325 }
5326 COPY_BUF(l,buf,len,cur);
5327 NEXTL(l);
5328 cur = CUR_CHAR(l);
5329 if (cur == 0) {
5330 SHRINK;
5331 GROW;
5332 cur = CUR_CHAR(l);
5333 }
5334 }
Daniel Veillard51304812012-07-19 20:34:26 +08005335 if ((len > XML_MAX_TEXT_LENGTH) &&
5336 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5337 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5338 "PI %s too big found", target);
5339 xmlFree(buf);
5340 ctxt->instate = state;
5341 return;
5342 }
Owen Taylor3473f882001-02-23 17:55:21 +00005343 buf[len] = 0;
5344 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005345 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5346 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005347 } else {
5348 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005349 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5350 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005351 }
5352 SKIP(2);
5353
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005354#ifdef LIBXML_CATALOG_ENABLED
5355 if (((state == XML_PARSER_MISC) ||
5356 (state == XML_PARSER_START)) &&
5357 (xmlStrEqual(target, XML_CATALOG_PI))) {
5358 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5359 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5360 (allow == XML_CATA_ALLOW_ALL))
5361 xmlParseCatalogPI(ctxt, buf);
5362 }
5363#endif
5364
5365
Owen Taylor3473f882001-02-23 17:55:21 +00005366 /*
5367 * SAX: PI detected.
5368 */
5369 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5370 (ctxt->sax->processingInstruction != NULL))
5371 ctxt->sax->processingInstruction(ctxt->userData,
5372 target, buf);
5373 }
5374 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005375 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005376 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005377 }
Chris Evans77404b82011-12-14 16:18:25 +08005378 if (ctxt->instate != XML_PARSER_EOF)
5379 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005380 }
5381}
5382
5383/**
5384 * xmlParseNotationDecl:
5385 * @ctxt: an XML parser context
5386 *
5387 * parse a notation declaration
5388 *
5389 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5390 *
5391 * Hence there is actually 3 choices:
5392 * 'PUBLIC' S PubidLiteral
5393 * 'PUBLIC' S PubidLiteral S SystemLiteral
5394 * and 'SYSTEM' S SystemLiteral
5395 *
5396 * See the NOTE on xmlParseExternalID().
5397 */
5398
5399void
5400xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005401 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005402 xmlChar *Pubid;
5403 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005404
Daniel Veillarda07050d2003-10-19 14:46:32 +00005405 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005406 xmlParserInputPtr input = ctxt->input;
5407 SHRINK;
5408 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005409 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005410 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5411 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005412 return;
5413 }
5414 SKIP_BLANKS;
5415
Daniel Veillard76d66f42001-05-16 21:05:17 +00005416 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005417 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005418 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005419 return;
5420 }
William M. Brack76e95df2003-10-18 16:20:14 +00005421 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005423 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005424 return;
5425 }
Daniel Veillard37334572008-07-31 08:20:02 +00005426 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005427 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005428 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005429 name, NULL, NULL);
5430 }
Owen Taylor3473f882001-02-23 17:55:21 +00005431 SKIP_BLANKS;
5432
5433 /*
5434 * Parse the IDs.
5435 */
5436 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5437 SKIP_BLANKS;
5438
5439 if (RAW == '>') {
5440 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005441 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005443 }
5444 NEXT;
5445 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5446 (ctxt->sax->notationDecl != NULL))
5447 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5448 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005449 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005450 }
Owen Taylor3473f882001-02-23 17:55:21 +00005451 if (Systemid != NULL) xmlFree(Systemid);
5452 if (Pubid != NULL) xmlFree(Pubid);
5453 }
5454}
5455
5456/**
5457 * xmlParseEntityDecl:
5458 * @ctxt: an XML parser context
5459 *
5460 * parse <!ENTITY declarations
5461 *
5462 * [70] EntityDecl ::= GEDecl | PEDecl
5463 *
5464 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5465 *
5466 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5467 *
5468 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5469 *
5470 * [74] PEDef ::= EntityValue | ExternalID
5471 *
5472 * [76] NDataDecl ::= S 'NDATA' S Name
5473 *
5474 * [ VC: Notation Declared ]
5475 * The Name must match the declared name of a notation.
5476 */
5477
5478void
5479xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005480 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005481 xmlChar *value = NULL;
5482 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005483 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005484 int isParameter = 0;
5485 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005486 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005487
Daniel Veillard4c778d82005-01-23 17:37:44 +00005488 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005489 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005490 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005491 SHRINK;
5492 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005493 skipped = SKIP_BLANKS;
5494 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005495 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5496 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005497 }
Owen Taylor3473f882001-02-23 17:55:21 +00005498
5499 if (RAW == '%') {
5500 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005501 skipped = SKIP_BLANKS;
5502 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005503 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5504 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005505 }
Owen Taylor3473f882001-02-23 17:55:21 +00005506 isParameter = 1;
5507 }
5508
Daniel Veillard76d66f42001-05-16 21:05:17 +00005509 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005510 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005511 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5512 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005513 return;
5514 }
Daniel Veillard37334572008-07-31 08:20:02 +00005515 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005516 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005517 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005518 name, NULL, NULL);
5519 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005520 skipped = SKIP_BLANKS;
5521 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005522 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5523 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005524 }
Owen Taylor3473f882001-02-23 17:55:21 +00005525
Daniel Veillardf5582f12002-06-11 10:08:16 +00005526 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005527 /*
5528 * handle the various case of definitions...
5529 */
5530 if (isParameter) {
5531 if ((RAW == '"') || (RAW == '\'')) {
5532 value = xmlParseEntityValue(ctxt, &orig);
5533 if (value) {
5534 if ((ctxt->sax != NULL) &&
5535 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5536 ctxt->sax->entityDecl(ctxt->userData, name,
5537 XML_INTERNAL_PARAMETER_ENTITY,
5538 NULL, NULL, value);
5539 }
5540 } else {
5541 URI = xmlParseExternalID(ctxt, &literal, 1);
5542 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005543 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005544 }
5545 if (URI) {
5546 xmlURIPtr uri;
5547
5548 uri = xmlParseURI((const char *) URI);
5549 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005550 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5551 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005552 /*
5553 * This really ought to be a well formedness error
5554 * but the XML Core WG decided otherwise c.f. issue
5555 * E26 of the XML erratas.
5556 */
Owen Taylor3473f882001-02-23 17:55:21 +00005557 } else {
5558 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005559 /*
5560 * Okay this is foolish to block those but not
5561 * invalid URIs.
5562 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005563 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 } else {
5565 if ((ctxt->sax != NULL) &&
5566 (!ctxt->disableSAX) &&
5567 (ctxt->sax->entityDecl != NULL))
5568 ctxt->sax->entityDecl(ctxt->userData, name,
5569 XML_EXTERNAL_PARAMETER_ENTITY,
5570 literal, URI, NULL);
5571 }
5572 xmlFreeURI(uri);
5573 }
5574 }
5575 }
5576 } else {
5577 if ((RAW == '"') || (RAW == '\'')) {
5578 value = xmlParseEntityValue(ctxt, &orig);
5579 if ((ctxt->sax != NULL) &&
5580 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5581 ctxt->sax->entityDecl(ctxt->userData, name,
5582 XML_INTERNAL_GENERAL_ENTITY,
5583 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005584 /*
5585 * For expat compatibility in SAX mode.
5586 */
5587 if ((ctxt->myDoc == NULL) ||
5588 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5589 if (ctxt->myDoc == NULL) {
5590 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005591 if (ctxt->myDoc == NULL) {
5592 xmlErrMemory(ctxt, "New Doc failed");
5593 return;
5594 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005595 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005596 }
5597 if (ctxt->myDoc->intSubset == NULL)
5598 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5599 BAD_CAST "fake", NULL, NULL);
5600
Daniel Veillard1af9a412003-08-20 22:54:39 +00005601 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5602 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005603 }
Owen Taylor3473f882001-02-23 17:55:21 +00005604 } else {
5605 URI = xmlParseExternalID(ctxt, &literal, 1);
5606 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005607 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005608 }
5609 if (URI) {
5610 xmlURIPtr uri;
5611
5612 uri = xmlParseURI((const char *)URI);
5613 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005614 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5615 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005616 /*
5617 * This really ought to be a well formedness error
5618 * but the XML Core WG decided otherwise c.f. issue
5619 * E26 of the XML erratas.
5620 */
Owen Taylor3473f882001-02-23 17:55:21 +00005621 } else {
5622 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005623 /*
5624 * Okay this is foolish to block those but not
5625 * invalid URIs.
5626 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005627 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005628 }
5629 xmlFreeURI(uri);
5630 }
5631 }
William M. Brack76e95df2003-10-18 16:20:14 +00005632 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005633 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5634 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005635 }
5636 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005637 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005638 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005639 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005640 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5641 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005642 }
5643 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005644 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005645 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5646 (ctxt->sax->unparsedEntityDecl != NULL))
5647 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5648 literal, URI, ndata);
5649 } else {
5650 if ((ctxt->sax != NULL) &&
5651 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5652 ctxt->sax->entityDecl(ctxt->userData, name,
5653 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5654 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005655 /*
5656 * For expat compatibility in SAX mode.
5657 * assuming the entity repalcement was asked for
5658 */
5659 if ((ctxt->replaceEntities != 0) &&
5660 ((ctxt->myDoc == NULL) ||
5661 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5662 if (ctxt->myDoc == NULL) {
5663 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005664 if (ctxt->myDoc == NULL) {
5665 xmlErrMemory(ctxt, "New Doc failed");
5666 return;
5667 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005668 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005669 }
5670
5671 if (ctxt->myDoc->intSubset == NULL)
5672 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5673 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005674 xmlSAX2EntityDecl(ctxt, name,
5675 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5676 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005677 }
Owen Taylor3473f882001-02-23 17:55:21 +00005678 }
5679 }
5680 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005681 if (ctxt->instate == XML_PARSER_EOF)
5682 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005683 SKIP_BLANKS;
5684 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005685 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005686 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005687 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005688 } else {
5689 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005690 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5691 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005692 }
5693 NEXT;
5694 }
5695 if (orig != NULL) {
5696 /*
5697 * Ugly mechanism to save the raw entity value.
5698 */
5699 xmlEntityPtr cur = NULL;
5700
5701 if (isParameter) {
5702 if ((ctxt->sax != NULL) &&
5703 (ctxt->sax->getParameterEntity != NULL))
5704 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5705 } else {
5706 if ((ctxt->sax != NULL) &&
5707 (ctxt->sax->getEntity != NULL))
5708 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005709 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005710 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005711 }
Owen Taylor3473f882001-02-23 17:55:21 +00005712 }
5713 if (cur != NULL) {
5714 if (cur->orig != NULL)
5715 xmlFree(orig);
5716 else
5717 cur->orig = orig;
5718 } else
5719 xmlFree(orig);
5720 }
Owen Taylor3473f882001-02-23 17:55:21 +00005721 if (value != NULL) xmlFree(value);
5722 if (URI != NULL) xmlFree(URI);
5723 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005724 }
5725}
5726
5727/**
5728 * xmlParseDefaultDecl:
5729 * @ctxt: an XML parser context
5730 * @value: Receive a possible fixed default value for the attribute
5731 *
5732 * Parse an attribute default declaration
5733 *
5734 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5735 *
5736 * [ VC: Required Attribute ]
5737 * if the default declaration is the keyword #REQUIRED, then the
5738 * attribute must be specified for all elements of the type in the
5739 * attribute-list declaration.
5740 *
5741 * [ VC: Attribute Default Legal ]
5742 * The declared default value must meet the lexical constraints of
5743 * the declared attribute type c.f. xmlValidateAttributeDecl()
5744 *
5745 * [ VC: Fixed Attribute Default ]
5746 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005747 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005748 *
5749 * [ WFC: No < in Attribute Values ]
5750 * handled in xmlParseAttValue()
5751 *
5752 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005753 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005754 */
5755
5756int
5757xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5758 int val;
5759 xmlChar *ret;
5760
5761 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005762 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005763 SKIP(9);
5764 return(XML_ATTRIBUTE_REQUIRED);
5765 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005766 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005767 SKIP(8);
5768 return(XML_ATTRIBUTE_IMPLIED);
5769 }
5770 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005771 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005772 SKIP(6);
5773 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005774 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005775 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5776 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005777 }
5778 SKIP_BLANKS;
5779 }
5780 ret = xmlParseAttValue(ctxt);
5781 ctxt->instate = XML_PARSER_DTD;
5782 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005783 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005784 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005785 } else
5786 *value = ret;
5787 return(val);
5788}
5789
5790/**
5791 * xmlParseNotationType:
5792 * @ctxt: an XML parser context
5793 *
5794 * parse an Notation attribute type.
5795 *
5796 * Note: the leading 'NOTATION' S part has already being parsed...
5797 *
5798 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5799 *
5800 * [ VC: Notation Attributes ]
5801 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005802 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005803 *
5804 * Returns: the notation attribute tree built while parsing
5805 */
5806
5807xmlEnumerationPtr
5808xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005809 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005810 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005811
5812 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005813 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005814 return(NULL);
5815 }
5816 SHRINK;
5817 do {
5818 NEXT;
5819 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005820 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005821 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005822 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5823 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005824 xmlFreeEnumeration(ret);
5825 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005826 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005827 tmp = ret;
5828 while (tmp != NULL) {
5829 if (xmlStrEqual(name, tmp->name)) {
5830 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5831 "standalone: attribute notation value token %s duplicated\n",
5832 name, NULL);
5833 if (!xmlDictOwns(ctxt->dict, name))
5834 xmlFree((xmlChar *) name);
5835 break;
5836 }
5837 tmp = tmp->next;
5838 }
5839 if (tmp == NULL) {
5840 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005841 if (cur == NULL) {
5842 xmlFreeEnumeration(ret);
5843 return(NULL);
5844 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005845 if (last == NULL) ret = last = cur;
5846 else {
5847 last->next = cur;
5848 last = cur;
5849 }
Owen Taylor3473f882001-02-23 17:55:21 +00005850 }
5851 SKIP_BLANKS;
5852 } while (RAW == '|');
5853 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005854 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005855 xmlFreeEnumeration(ret);
5856 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005857 }
5858 NEXT;
5859 return(ret);
5860}
5861
5862/**
5863 * xmlParseEnumerationType:
5864 * @ctxt: an XML parser context
5865 *
5866 * parse an Enumeration attribute type.
5867 *
5868 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5869 *
5870 * [ VC: Enumeration ]
5871 * Values of this type must match one of the Nmtoken tokens in
5872 * the declaration
5873 *
5874 * Returns: the enumeration attribute tree built while parsing
5875 */
5876
5877xmlEnumerationPtr
5878xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5879 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005880 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005881
5882 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005883 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005884 return(NULL);
5885 }
5886 SHRINK;
5887 do {
5888 NEXT;
5889 SKIP_BLANKS;
5890 name = xmlParseNmtoken(ctxt);
5891 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005892 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005893 return(ret);
5894 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005895 tmp = ret;
5896 while (tmp != NULL) {
5897 if (xmlStrEqual(name, tmp->name)) {
5898 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5899 "standalone: attribute enumeration value token %s duplicated\n",
5900 name, NULL);
5901 if (!xmlDictOwns(ctxt->dict, name))
5902 xmlFree(name);
5903 break;
5904 }
5905 tmp = tmp->next;
5906 }
5907 if (tmp == NULL) {
5908 cur = xmlCreateEnumeration(name);
5909 if (!xmlDictOwns(ctxt->dict, name))
5910 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005911 if (cur == NULL) {
5912 xmlFreeEnumeration(ret);
5913 return(NULL);
5914 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005915 if (last == NULL) ret = last = cur;
5916 else {
5917 last->next = cur;
5918 last = cur;
5919 }
Owen Taylor3473f882001-02-23 17:55:21 +00005920 }
5921 SKIP_BLANKS;
5922 } while (RAW == '|');
5923 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005924 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 return(ret);
5926 }
5927 NEXT;
5928 return(ret);
5929}
5930
5931/**
5932 * xmlParseEnumeratedType:
5933 * @ctxt: an XML parser context
5934 * @tree: the enumeration tree built while parsing
5935 *
5936 * parse an Enumerated attribute type.
5937 *
5938 * [57] EnumeratedType ::= NotationType | Enumeration
5939 *
5940 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5941 *
5942 *
5943 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5944 */
5945
5946int
5947xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005948 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005949 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005950 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005951 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5952 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005953 return(0);
5954 }
5955 SKIP_BLANKS;
5956 *tree = xmlParseNotationType(ctxt);
5957 if (*tree == NULL) return(0);
5958 return(XML_ATTRIBUTE_NOTATION);
5959 }
5960 *tree = xmlParseEnumerationType(ctxt);
5961 if (*tree == NULL) return(0);
5962 return(XML_ATTRIBUTE_ENUMERATION);
5963}
5964
5965/**
5966 * xmlParseAttributeType:
5967 * @ctxt: an XML parser context
5968 * @tree: the enumeration tree built while parsing
5969 *
5970 * parse the Attribute list def for an element
5971 *
5972 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5973 *
5974 * [55] StringType ::= 'CDATA'
5975 *
5976 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5977 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5978 *
5979 * Validity constraints for attribute values syntax are checked in
5980 * xmlValidateAttributeValue()
5981 *
5982 * [ VC: ID ]
5983 * Values of type ID must match the Name production. A name must not
5984 * appear more than once in an XML document as a value of this type;
5985 * i.e., ID values must uniquely identify the elements which bear them.
5986 *
5987 * [ VC: One ID per Element Type ]
5988 * No element type may have more than one ID attribute specified.
5989 *
5990 * [ VC: ID Attribute Default ]
5991 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5992 *
5993 * [ VC: IDREF ]
5994 * Values of type IDREF must match the Name production, and values
5995 * of type IDREFS must match Names; each IDREF Name must match the value
5996 * of an ID attribute on some element in the XML document; i.e. IDREF
5997 * values must match the value of some ID attribute.
5998 *
5999 * [ VC: Entity Name ]
6000 * Values of type ENTITY must match the Name production, values
6001 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006002 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00006003 *
6004 * [ VC: Name Token ]
6005 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006006 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00006007 *
6008 * Returns the attribute type
6009 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006010int
Owen Taylor3473f882001-02-23 17:55:21 +00006011xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6012 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006013 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006014 SKIP(5);
6015 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006016 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006017 SKIP(6);
6018 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006019 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006020 SKIP(5);
6021 return(XML_ATTRIBUTE_IDREF);
6022 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6023 SKIP(2);
6024 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006025 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006026 SKIP(6);
6027 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006028 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006029 SKIP(8);
6030 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006031 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006032 SKIP(8);
6033 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006034 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006035 SKIP(7);
6036 return(XML_ATTRIBUTE_NMTOKEN);
6037 }
6038 return(xmlParseEnumeratedType(ctxt, tree));
6039}
6040
6041/**
6042 * xmlParseAttributeListDecl:
6043 * @ctxt: an XML parser context
6044 *
6045 * : parse the Attribute list def for an element
6046 *
6047 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6048 *
6049 * [53] AttDef ::= S Name S AttType S DefaultDecl
6050 *
6051 */
6052void
6053xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006054 const xmlChar *elemName;
6055 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00006056 xmlEnumerationPtr tree;
6057
Daniel Veillarda07050d2003-10-19 14:46:32 +00006058 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006059 xmlParserInputPtr input = ctxt->input;
6060
6061 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006062 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006063 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006064 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006065 }
6066 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006067 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006068 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006069 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6070 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006071 return;
6072 }
6073 SKIP_BLANKS;
6074 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006075 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006076 const xmlChar *check = CUR_PTR;
6077 int type;
6078 int def;
6079 xmlChar *defaultValue = NULL;
6080
6081 GROW;
6082 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006083 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006084 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006085 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6086 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006087 break;
6088 }
6089 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006090 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006091 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006092 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006093 break;
6094 }
6095 SKIP_BLANKS;
6096
6097 type = xmlParseAttributeType(ctxt, &tree);
6098 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006099 break;
6100 }
6101
6102 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006103 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006104 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6105 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006106 if (tree != NULL)
6107 xmlFreeEnumeration(tree);
6108 break;
6109 }
6110 SKIP_BLANKS;
6111
6112 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6113 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006114 if (defaultValue != NULL)
6115 xmlFree(defaultValue);
6116 if (tree != NULL)
6117 xmlFreeEnumeration(tree);
6118 break;
6119 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006120 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6121 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006122
6123 GROW;
6124 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006125 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006126 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006127 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006128 if (defaultValue != NULL)
6129 xmlFree(defaultValue);
6130 if (tree != NULL)
6131 xmlFreeEnumeration(tree);
6132 break;
6133 }
6134 SKIP_BLANKS;
6135 }
6136 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006137 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6138 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006139 if (defaultValue != NULL)
6140 xmlFree(defaultValue);
6141 if (tree != NULL)
6142 xmlFreeEnumeration(tree);
6143 break;
6144 }
6145 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6146 (ctxt->sax->attributeDecl != NULL))
6147 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6148 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006149 else if (tree != NULL)
6150 xmlFreeEnumeration(tree);
6151
6152 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006153 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006154 (def != XML_ATTRIBUTE_REQUIRED)) {
6155 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6156 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006157 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006158 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6159 }
Owen Taylor3473f882001-02-23 17:55:21 +00006160 if (defaultValue != NULL)
6161 xmlFree(defaultValue);
6162 GROW;
6163 }
6164 if (RAW == '>') {
6165 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006166 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6167 "Attribute list declaration doesn't start and stop in the same entity\n",
6168 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006169 }
6170 NEXT;
6171 }
Owen Taylor3473f882001-02-23 17:55:21 +00006172 }
6173}
6174
6175/**
6176 * xmlParseElementMixedContentDecl:
6177 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006178 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006179 *
6180 * parse the declaration for a Mixed Element content
6181 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006182 *
Owen Taylor3473f882001-02-23 17:55:21 +00006183 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6184 * '(' S? '#PCDATA' S? ')'
6185 *
6186 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6187 *
6188 * [ VC: No Duplicate Types ]
6189 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006190 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006191 *
6192 * returns: the list of the xmlElementContentPtr describing the element choices
6193 */
6194xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006195xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006196 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006197 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006198
6199 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006200 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006201 SKIP(7);
6202 SKIP_BLANKS;
6203 SHRINK;
6204 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006205 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006206 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6207"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006208 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006209 }
Owen Taylor3473f882001-02-23 17:55:21 +00006210 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006211 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006212 if (ret == NULL)
6213 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006214 if (RAW == '*') {
6215 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6216 NEXT;
6217 }
6218 return(ret);
6219 }
6220 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006221 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006222 if (ret == NULL) return(NULL);
6223 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006224 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006225 NEXT;
6226 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006227 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006228 if (ret == NULL) return(NULL);
6229 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006230 if (cur != NULL)
6231 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006232 cur = ret;
6233 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006234 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006235 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006236 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006237 if (n->c1 != NULL)
6238 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006239 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006240 if (n != NULL)
6241 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006242 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006243 }
6244 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006245 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006246 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006247 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006248 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006249 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006250 return(NULL);
6251 }
6252 SKIP_BLANKS;
6253 GROW;
6254 }
6255 if ((RAW == ')') && (NXT(1) == '*')) {
6256 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006257 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006258 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006259 if (cur->c2 != NULL)
6260 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006261 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006262 if (ret != NULL)
6263 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006264 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006265 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6266"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006267 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006268 }
Owen Taylor3473f882001-02-23 17:55:21 +00006269 SKIP(2);
6270 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006271 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006272 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006273 return(NULL);
6274 }
6275
6276 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006277 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006278 }
6279 return(ret);
6280}
6281
6282/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006283 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006284 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006285 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006286 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006287 *
6288 * parse the declaration for a Mixed Element content
6289 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006290 *
Owen Taylor3473f882001-02-23 17:55:21 +00006291 *
6292 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6293 *
6294 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6295 *
6296 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6297 *
6298 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6299 *
6300 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6301 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006302 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006303 * opening or closing parentheses in a choice, seq, or Mixed
6304 * construct is contained in the replacement text for a parameter
6305 * entity, both must be contained in the same replacement text. For
6306 * interoperability, if a parameter-entity reference appears in a
6307 * choice, seq, or Mixed construct, its replacement text should not
6308 * be empty, and neither the first nor last non-blank character of
6309 * the replacement text should be a connector (| or ,).
6310 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006311 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006312 * hierarchy.
6313 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006314static xmlElementContentPtr
6315xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6316 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006317 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006318 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006319 xmlChar type = 0;
6320
Daniel Veillard489f9672009-08-10 16:49:30 +02006321 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6322 (depth > 2048)) {
6323 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6324"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6325 depth);
6326 return(NULL);
6327 }
Owen Taylor3473f882001-02-23 17:55:21 +00006328 SKIP_BLANKS;
6329 GROW;
6330 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006331 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006332
Owen Taylor3473f882001-02-23 17:55:21 +00006333 /* Recurse on first child */
6334 NEXT;
6335 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006336 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6337 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006338 SKIP_BLANKS;
6339 GROW;
6340 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006341 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006342 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006343 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006344 return(NULL);
6345 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006346 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006347 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006348 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006349 return(NULL);
6350 }
Owen Taylor3473f882001-02-23 17:55:21 +00006351 GROW;
6352 if (RAW == '?') {
6353 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6354 NEXT;
6355 } else if (RAW == '*') {
6356 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6357 NEXT;
6358 } else if (RAW == '+') {
6359 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6360 NEXT;
6361 } else {
6362 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6363 }
Owen Taylor3473f882001-02-23 17:55:21 +00006364 GROW;
6365 }
6366 SKIP_BLANKS;
6367 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006368 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006369 /*
6370 * Each loop we parse one separator and one element.
6371 */
6372 if (RAW == ',') {
6373 if (type == 0) type = CUR;
6374
6375 /*
6376 * Detect "Name | Name , Name" error
6377 */
6378 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006379 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006380 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006381 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006382 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006383 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006384 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006385 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006386 return(NULL);
6387 }
6388 NEXT;
6389
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006390 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006391 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006392 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006393 xmlFreeDocElementContent(ctxt->myDoc, last);
6394 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006395 return(NULL);
6396 }
6397 if (last == NULL) {
6398 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006399 if (ret != NULL)
6400 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006401 ret = cur = op;
6402 } else {
6403 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006404 if (op != NULL)
6405 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006406 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006407 if (last != NULL)
6408 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006409 cur =op;
6410 last = NULL;
6411 }
6412 } else if (RAW == '|') {
6413 if (type == 0) type = CUR;
6414
6415 /*
6416 * Detect "Name , Name | Name" error
6417 */
6418 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006419 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006420 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006421 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006422 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006423 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006424 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006425 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006426 return(NULL);
6427 }
6428 NEXT;
6429
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006430 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006431 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006432 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006433 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006434 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006435 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006436 return(NULL);
6437 }
6438 if (last == NULL) {
6439 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006440 if (ret != NULL)
6441 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006442 ret = cur = op;
6443 } else {
6444 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006445 if (op != NULL)
6446 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006447 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006448 if (last != NULL)
6449 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006450 cur =op;
6451 last = NULL;
6452 }
6453 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006454 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006455 if ((last != NULL) && (last != ret))
6456 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006457 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006458 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006459 return(NULL);
6460 }
6461 GROW;
6462 SKIP_BLANKS;
6463 GROW;
6464 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006465 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006466 /* Recurse on second child */
6467 NEXT;
6468 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006469 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6470 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006471 SKIP_BLANKS;
6472 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006473 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006474 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006475 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006476 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006477 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006478 return(NULL);
6479 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006480 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006481 if (last == NULL) {
6482 if (ret != NULL)
6483 xmlFreeDocElementContent(ctxt->myDoc, ret);
6484 return(NULL);
6485 }
Owen Taylor3473f882001-02-23 17:55:21 +00006486 if (RAW == '?') {
6487 last->ocur = XML_ELEMENT_CONTENT_OPT;
6488 NEXT;
6489 } else if (RAW == '*') {
6490 last->ocur = XML_ELEMENT_CONTENT_MULT;
6491 NEXT;
6492 } else if (RAW == '+') {
6493 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6494 NEXT;
6495 } else {
6496 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6497 }
6498 }
6499 SKIP_BLANKS;
6500 GROW;
6501 }
6502 if ((cur != NULL) && (last != NULL)) {
6503 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006504 if (last != NULL)
6505 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006506 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006507 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006508 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6509"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006510 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006511 }
Owen Taylor3473f882001-02-23 17:55:21 +00006512 NEXT;
6513 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006514 if (ret != NULL) {
6515 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6516 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6517 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6518 else
6519 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6520 }
Owen Taylor3473f882001-02-23 17:55:21 +00006521 NEXT;
6522 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006523 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006524 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006525 cur = ret;
6526 /*
6527 * Some normalization:
6528 * (a | b* | c?)* == (a | b | c)*
6529 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006530 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006531 if ((cur->c1 != NULL) &&
6532 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6533 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6534 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6535 if ((cur->c2 != NULL) &&
6536 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6537 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6538 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6539 cur = cur->c2;
6540 }
6541 }
Owen Taylor3473f882001-02-23 17:55:21 +00006542 NEXT;
6543 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006544 if (ret != NULL) {
6545 int found = 0;
6546
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006547 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6549 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006550 else
6551 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006552 /*
6553 * Some normalization:
6554 * (a | b*)+ == (a | b)*
6555 * (a | b?)+ == (a | b)*
6556 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006557 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006558 if ((cur->c1 != NULL) &&
6559 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6560 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6561 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6562 found = 1;
6563 }
6564 if ((cur->c2 != NULL) &&
6565 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6566 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6567 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6568 found = 1;
6569 }
6570 cur = cur->c2;
6571 }
6572 if (found)
6573 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6574 }
Owen Taylor3473f882001-02-23 17:55:21 +00006575 NEXT;
6576 }
6577 return(ret);
6578}
6579
6580/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006581 * xmlParseElementChildrenContentDecl:
6582 * @ctxt: an XML parser context
6583 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006584 *
6585 * parse the declaration for a Mixed Element content
6586 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6587 *
6588 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6589 *
6590 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6591 *
6592 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6593 *
6594 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6595 *
6596 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6597 * TODO Parameter-entity replacement text must be properly nested
6598 * with parenthesized groups. That is to say, if either of the
6599 * opening or closing parentheses in a choice, seq, or Mixed
6600 * construct is contained in the replacement text for a parameter
6601 * entity, both must be contained in the same replacement text. For
6602 * interoperability, if a parameter-entity reference appears in a
6603 * choice, seq, or Mixed construct, its replacement text should not
6604 * be empty, and neither the first nor last non-blank character of
6605 * the replacement text should be a connector (| or ,).
6606 *
6607 * Returns the tree of xmlElementContentPtr describing the element
6608 * hierarchy.
6609 */
6610xmlElementContentPtr
6611xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6612 /* stub left for API/ABI compat */
6613 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6614}
6615
6616/**
Owen Taylor3473f882001-02-23 17:55:21 +00006617 * xmlParseElementContentDecl:
6618 * @ctxt: an XML parser context
6619 * @name: the name of the element being defined.
6620 * @result: the Element Content pointer will be stored here if any
6621 *
6622 * parse the declaration for an Element content either Mixed or Children,
6623 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006624 *
Owen Taylor3473f882001-02-23 17:55:21 +00006625 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6626 *
6627 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6628 */
6629
6630int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006631xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006632 xmlElementContentPtr *result) {
6633
6634 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006635 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006636 int res;
6637
6638 *result = NULL;
6639
6640 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006641 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006642 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006643 return(-1);
6644 }
6645 NEXT;
6646 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006647 if (ctxt->instate == XML_PARSER_EOF)
6648 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006649 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006650 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006651 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006652 res = XML_ELEMENT_TYPE_MIXED;
6653 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006654 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006655 res = XML_ELEMENT_TYPE_ELEMENT;
6656 }
Owen Taylor3473f882001-02-23 17:55:21 +00006657 SKIP_BLANKS;
6658 *result = tree;
6659 return(res);
6660}
6661
6662/**
6663 * xmlParseElementDecl:
6664 * @ctxt: an XML parser context
6665 *
6666 * parse an Element declaration.
6667 *
6668 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6669 *
6670 * [ VC: Unique Element Type Declaration ]
6671 * No element type may be declared more than once
6672 *
6673 * Returns the type of the element, or -1 in case of error
6674 */
6675int
6676xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006677 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006678 int ret = -1;
6679 xmlElementContentPtr content = NULL;
6680
Daniel Veillard4c778d82005-01-23 17:37:44 +00006681 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006682 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006683 xmlParserInputPtr input = ctxt->input;
6684
6685 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006686 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006687 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6688 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006689 }
6690 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006691 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006692 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006693 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6694 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006695 return(-1);
6696 }
6697 while ((RAW == 0) && (ctxt->inputNr > 1))
6698 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006699 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6701 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006702 }
6703 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006704 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006705 SKIP(5);
6706 /*
6707 * Element must always be empty.
6708 */
6709 ret = XML_ELEMENT_TYPE_EMPTY;
6710 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6711 (NXT(2) == 'Y')) {
6712 SKIP(3);
6713 /*
6714 * Element is a generic container.
6715 */
6716 ret = XML_ELEMENT_TYPE_ANY;
6717 } else if (RAW == '(') {
6718 ret = xmlParseElementContentDecl(ctxt, name, &content);
6719 } else {
6720 /*
6721 * [ WFC: PEs in Internal Subset ] error handling.
6722 */
6723 if ((RAW == '%') && (ctxt->external == 0) &&
6724 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006725 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006726 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006727 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006728 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006729 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6730 }
Owen Taylor3473f882001-02-23 17:55:21 +00006731 return(-1);
6732 }
6733
6734 SKIP_BLANKS;
6735 /*
6736 * Pop-up of finished entities.
6737 */
6738 while ((RAW == 0) && (ctxt->inputNr > 1))
6739 xmlPopInput(ctxt);
6740 SKIP_BLANKS;
6741
6742 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006743 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006744 if (content != NULL) {
6745 xmlFreeDocElementContent(ctxt->myDoc, content);
6746 }
Owen Taylor3473f882001-02-23 17:55:21 +00006747 } else {
6748 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006749 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006751 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006752
Owen Taylor3473f882001-02-23 17:55:21 +00006753 NEXT;
6754 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006755 (ctxt->sax->elementDecl != NULL)) {
6756 if (content != NULL)
6757 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006758 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6759 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006760 if ((content != NULL) && (content->parent == NULL)) {
6761 /*
6762 * this is a trick: if xmlAddElementDecl is called,
6763 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006764 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006765 * interfaces or change the API/ABI
6766 */
6767 xmlFreeDocElementContent(ctxt->myDoc, content);
6768 }
6769 } else if (content != NULL) {
6770 xmlFreeDocElementContent(ctxt->myDoc, content);
6771 }
Owen Taylor3473f882001-02-23 17:55:21 +00006772 }
Owen Taylor3473f882001-02-23 17:55:21 +00006773 }
6774 return(ret);
6775}
6776
6777/**
Owen Taylor3473f882001-02-23 17:55:21 +00006778 * xmlParseConditionalSections
6779 * @ctxt: an XML parser context
6780 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006781 * [61] conditionalSect ::= includeSect | ignoreSect
6782 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006783 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6784 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6785 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6786 */
6787
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006788static void
Owen Taylor3473f882001-02-23 17:55:21 +00006789xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006790 int id = ctxt->input->id;
6791
Owen Taylor3473f882001-02-23 17:55:21 +00006792 SKIP(3);
6793 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006794 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006795 SKIP(7);
6796 SKIP_BLANKS;
6797 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006798 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006799 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006800 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006801 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006802 if (ctxt->input->id != id) {
6803 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6804 "All markup of the conditional section is not in the same entity\n",
6805 NULL, NULL);
6806 }
Owen Taylor3473f882001-02-23 17:55:21 +00006807 NEXT;
6808 }
6809 if (xmlParserDebugEntities) {
6810 if ((ctxt->input != NULL) && (ctxt->input->filename))
6811 xmlGenericError(xmlGenericErrorContext,
6812 "%s(%d): ", ctxt->input->filename,
6813 ctxt->input->line);
6814 xmlGenericError(xmlGenericErrorContext,
6815 "Entering INCLUDE Conditional Section\n");
6816 }
6817
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006818 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6819 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006820 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006821 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006822
6823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6824 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006825 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006826 NEXT;
6827 } else if (RAW == '%') {
6828 xmlParsePEReference(ctxt);
6829 } else
6830 xmlParseMarkupDecl(ctxt);
6831
6832 /*
6833 * Pop-up of finished entities.
6834 */
6835 while ((RAW == 0) && (ctxt->inputNr > 1))
6836 xmlPopInput(ctxt);
6837
Daniel Veillardfdc91562002-07-01 21:52:03 +00006838 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006839 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006840 break;
6841 }
6842 }
6843 if (xmlParserDebugEntities) {
6844 if ((ctxt->input != NULL) && (ctxt->input->filename))
6845 xmlGenericError(xmlGenericErrorContext,
6846 "%s(%d): ", ctxt->input->filename,
6847 ctxt->input->line);
6848 xmlGenericError(xmlGenericErrorContext,
6849 "Leaving INCLUDE Conditional Section\n");
6850 }
6851
Daniel Veillarda07050d2003-10-19 14:46:32 +00006852 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006853 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006854 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006855 int depth = 0;
6856
6857 SKIP(6);
6858 SKIP_BLANKS;
6859 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006860 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006861 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006862 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006863 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006864 if (ctxt->input->id != id) {
6865 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6866 "All markup of the conditional section is not in the same entity\n",
6867 NULL, NULL);
6868 }
Owen Taylor3473f882001-02-23 17:55:21 +00006869 NEXT;
6870 }
6871 if (xmlParserDebugEntities) {
6872 if ((ctxt->input != NULL) && (ctxt->input->filename))
6873 xmlGenericError(xmlGenericErrorContext,
6874 "%s(%d): ", ctxt->input->filename,
6875 ctxt->input->line);
6876 xmlGenericError(xmlGenericErrorContext,
6877 "Entering IGNORE Conditional Section\n");
6878 }
6879
6880 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006881 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006882 * But disable SAX event generating DTD building in the meantime
6883 */
6884 state = ctxt->disableSAX;
6885 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006886 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006887 ctxt->instate = XML_PARSER_IGNORE;
6888
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006889 while (((depth >= 0) && (RAW != 0)) &&
6890 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006891 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6892 depth++;
6893 SKIP(3);
6894 continue;
6895 }
6896 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6897 if (--depth >= 0) SKIP(3);
6898 continue;
6899 }
6900 NEXT;
6901 continue;
6902 }
6903
6904 ctxt->disableSAX = state;
6905 ctxt->instate = instate;
6906
6907 if (xmlParserDebugEntities) {
6908 if ((ctxt->input != NULL) && (ctxt->input->filename))
6909 xmlGenericError(xmlGenericErrorContext,
6910 "%s(%d): ", ctxt->input->filename,
6911 ctxt->input->line);
6912 xmlGenericError(xmlGenericErrorContext,
6913 "Leaving IGNORE Conditional Section\n");
6914 }
6915
6916 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006917 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006918 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006919 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006920 }
6921
6922 if (RAW == 0)
6923 SHRINK;
6924
6925 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006926 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006927 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006928 if (ctxt->input->id != id) {
6929 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6930 "All markup of the conditional section is not in the same entity\n",
6931 NULL, NULL);
6932 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006933 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006934 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006935 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006936 }
6937}
6938
6939/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006940 * xmlParseMarkupDecl:
6941 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006942 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006943 * parse Markup declarations
6944 *
6945 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6946 * NotationDecl | PI | Comment
6947 *
6948 * [ VC: Proper Declaration/PE Nesting ]
6949 * Parameter-entity replacement text must be properly nested with
6950 * markup declarations. That is to say, if either the first character
6951 * or the last character of a markup declaration (markupdecl above) is
6952 * contained in the replacement text for a parameter-entity reference,
6953 * both must be contained in the same replacement text.
6954 *
6955 * [ WFC: PEs in Internal Subset ]
6956 * In the internal DTD subset, parameter-entity references can occur
6957 * only where markup declarations can occur, not within markup declarations.
6958 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006959 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006960 */
6961void
6962xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6963 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006964 if (CUR == '<') {
6965 if (NXT(1) == '!') {
6966 switch (NXT(2)) {
6967 case 'E':
6968 if (NXT(3) == 'L')
6969 xmlParseElementDecl(ctxt);
6970 else if (NXT(3) == 'N')
6971 xmlParseEntityDecl(ctxt);
6972 break;
6973 case 'A':
6974 xmlParseAttributeListDecl(ctxt);
6975 break;
6976 case 'N':
6977 xmlParseNotationDecl(ctxt);
6978 break;
6979 case '-':
6980 xmlParseComment(ctxt);
6981 break;
6982 default:
6983 /* there is an error but it will be detected later */
6984 break;
6985 }
6986 } else if (NXT(1) == '?') {
6987 xmlParsePI(ctxt);
6988 }
6989 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006990
6991 /*
6992 * detect requirement to exit there and act accordingly
6993 * and avoid having instate overriden later on
6994 */
6995 if (ctxt->instate == XML_PARSER_EOF)
6996 return;
6997
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006998 /*
6999 * This is only for internal subset. On external entities,
7000 * the replacement is done before parsing stage
7001 */
7002 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7003 xmlParsePEReference(ctxt);
7004
7005 /*
7006 * Conditional sections are allowed from entities included
7007 * by PE References in the internal subset.
7008 */
7009 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7010 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7011 xmlParseConditionalSections(ctxt);
7012 }
7013 }
7014
7015 ctxt->instate = XML_PARSER_DTD;
7016}
7017
7018/**
7019 * xmlParseTextDecl:
7020 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00007021 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007022 * parse an XML declaration header for external entities
7023 *
7024 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007025 */
7026
7027void
7028xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7029 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007030 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007031
7032 /*
7033 * We know that '<?xml' is here.
7034 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007035 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007036 SKIP(5);
7037 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007038 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007039 return;
7040 }
7041
William M. Brack76e95df2003-10-18 16:20:14 +00007042 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007043 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7044 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007045 }
7046 SKIP_BLANKS;
7047
7048 /*
7049 * We may have the VersionInfo here.
7050 */
7051 version = xmlParseVersionInfo(ctxt);
7052 if (version == NULL)
7053 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00007054 else {
William M. Brack76e95df2003-10-18 16:20:14 +00007055 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007056 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7057 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00007058 }
7059 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007060 ctxt->input->version = version;
7061
7062 /*
7063 * We must have the encoding declaration
7064 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007065 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007066 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7067 /*
7068 * The XML REC instructs us to stop parsing right here
7069 */
7070 return;
7071 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007072 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7073 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7074 "Missing encoding in text declaration\n");
7075 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007076
7077 SKIP_BLANKS;
7078 if ((RAW == '?') && (NXT(1) == '>')) {
7079 SKIP(2);
7080 } else if (RAW == '>') {
7081 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007082 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007083 NEXT;
7084 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007085 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007086 MOVETO_ENDTAG(CUR_PTR);
7087 NEXT;
7088 }
7089}
7090
7091/**
Owen Taylor3473f882001-02-23 17:55:21 +00007092 * xmlParseExternalSubset:
7093 * @ctxt: an XML parser context
7094 * @ExternalID: the external identifier
7095 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007096 *
Owen Taylor3473f882001-02-23 17:55:21 +00007097 * parse Markup declarations from an external subset
7098 *
7099 * [30] extSubset ::= textDecl? extSubsetDecl
7100 *
7101 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7102 */
7103void
7104xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7105 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007106 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007107 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007108
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007109 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007110 (ctxt->input->end - ctxt->input->cur >= 4)) {
7111 xmlChar start[4];
7112 xmlCharEncoding enc;
7113
7114 start[0] = RAW;
7115 start[1] = NXT(1);
7116 start[2] = NXT(2);
7117 start[3] = NXT(3);
7118 enc = xmlDetectCharEncoding(start, 4);
7119 if (enc != XML_CHAR_ENCODING_NONE)
7120 xmlSwitchEncoding(ctxt, enc);
7121 }
7122
Daniel Veillarda07050d2003-10-19 14:46:32 +00007123 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007124 xmlParseTextDecl(ctxt);
7125 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7126 /*
7127 * The XML REC instructs us to stop parsing right here
7128 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08007129 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007130 return;
7131 }
7132 }
7133 if (ctxt->myDoc == NULL) {
7134 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007135 if (ctxt->myDoc == NULL) {
7136 xmlErrMemory(ctxt, "New Doc failed");
7137 return;
7138 }
7139 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007140 }
7141 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7142 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7143
7144 ctxt->instate = XML_PARSER_DTD;
7145 ctxt->external = 1;
7146 while (((RAW == '<') && (NXT(1) == '?')) ||
7147 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007148 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007149 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007150 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007151
7152 GROW;
7153 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7154 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007155 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007156 NEXT;
7157 } else if (RAW == '%') {
7158 xmlParsePEReference(ctxt);
7159 } else
7160 xmlParseMarkupDecl(ctxt);
7161
7162 /*
7163 * Pop-up of finished entities.
7164 */
7165 while ((RAW == 0) && (ctxt->inputNr > 1))
7166 xmlPopInput(ctxt);
7167
Daniel Veillardfdc91562002-07-01 21:52:03 +00007168 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007169 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007170 break;
7171 }
7172 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007173
Owen Taylor3473f882001-02-23 17:55:21 +00007174 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007175 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007176 }
7177
7178}
7179
7180/**
7181 * xmlParseReference:
7182 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007183 *
Owen Taylor3473f882001-02-23 17:55:21 +00007184 * parse and handle entity references in content, depending on the SAX
7185 * interface, this may end-up in a call to character() if this is a
7186 * CharRef, a predefined entity, if there is no reference() callback.
7187 * or if the parser was asked to switch to that mode.
7188 *
7189 * [67] Reference ::= EntityRef | CharRef
7190 */
7191void
7192xmlParseReference(xmlParserCtxtPtr ctxt) {
7193 xmlEntityPtr ent;
7194 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007195 int was_checked;
7196 xmlNodePtr list = NULL;
7197 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007198
Daniel Veillard0161e632008-08-28 15:36:32 +00007199
7200 if (RAW != '&')
7201 return;
7202
7203 /*
7204 * Simple case of a CharRef
7205 */
Owen Taylor3473f882001-02-23 17:55:21 +00007206 if (NXT(1) == '#') {
7207 int i = 0;
7208 xmlChar out[10];
7209 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007210 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007211
Daniel Veillarddc171602008-03-26 17:41:38 +00007212 if (value == 0)
7213 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007214 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7215 /*
7216 * So we are using non-UTF-8 buffers
7217 * Check that the char fit on 8bits, if not
7218 * generate a CharRef.
7219 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007220 if (value <= 0xFF) {
7221 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007222 out[1] = 0;
7223 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7224 (!ctxt->disableSAX))
7225 ctxt->sax->characters(ctxt->userData, out, 1);
7226 } else {
7227 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007228 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007229 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007230 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007231 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7232 (!ctxt->disableSAX))
7233 ctxt->sax->reference(ctxt->userData, out);
7234 }
7235 } else {
7236 /*
7237 * Just encode the value in UTF-8
7238 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007239 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007240 out[i] = 0;
7241 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7242 (!ctxt->disableSAX))
7243 ctxt->sax->characters(ctxt->userData, out, i);
7244 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007245 return;
7246 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007247
Daniel Veillard0161e632008-08-28 15:36:32 +00007248 /*
7249 * We are seeing an entity reference
7250 */
7251 ent = xmlParseEntityRef(ctxt);
7252 if (ent == NULL) return;
7253 if (!ctxt->wellFormed)
7254 return;
7255 was_checked = ent->checked;
7256
7257 /* special case of predefined entities */
7258 if ((ent->name == NULL) ||
7259 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7260 val = ent->content;
7261 if (val == NULL) return;
7262 /*
7263 * inline the entity.
7264 */
7265 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7266 (!ctxt->disableSAX))
7267 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7268 return;
7269 }
7270
7271 /*
7272 * The first reference to the entity trigger a parsing phase
7273 * where the ent->children is filled with the result from
7274 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007275 * Note: external parsed entities will not be loaded, it is not
7276 * required for a non-validating parser, unless the parsing option
7277 * of validating, or substituting entities were given. Doing so is
7278 * far more secure as the parser will only process data coming from
7279 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007280 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007281 if (((ent->checked == 0) ||
7282 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007283 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7284 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007285 unsigned long oldnbent = ctxt->nbentities;
7286
7287 /*
7288 * This is a bit hackish but this seems the best
7289 * way to make sure both SAX and DOM entity support
7290 * behaves okay.
7291 */
7292 void *user_data;
7293 if (ctxt->userData == ctxt)
7294 user_data = NULL;
7295 else
7296 user_data = ctxt->userData;
7297
7298 /*
7299 * Check that this entity is well formed
7300 * 4.3.2: An internal general parsed entity is well-formed
7301 * if its replacement text matches the production labeled
7302 * content.
7303 */
7304 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7305 ctxt->depth++;
7306 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7307 user_data, &list);
7308 ctxt->depth--;
7309
7310 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7311 ctxt->depth++;
7312 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7313 user_data, ctxt->depth, ent->URI,
7314 ent->ExternalID, &list);
7315 ctxt->depth--;
7316 } else {
7317 ret = XML_ERR_ENTITY_PE_INTERNAL;
7318 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7319 "invalid entity type found\n", NULL);
7320 }
7321
7322 /*
7323 * Store the number of entities needing parsing for this entity
7324 * content and do checkings
7325 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007326 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7327 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7328 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007329 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007330 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007331 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007332 return;
7333 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007334 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007335 xmlFreeNodeList(list);
7336 return;
7337 }
Owen Taylor3473f882001-02-23 17:55:21 +00007338
Daniel Veillard0161e632008-08-28 15:36:32 +00007339 if ((ret == XML_ERR_OK) && (list != NULL)) {
7340 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7341 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7342 (ent->children == NULL)) {
7343 ent->children = list;
7344 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007345 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007346 * Prune it directly in the generated document
7347 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007348 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007349 if (((list->type == XML_TEXT_NODE) &&
7350 (list->next == NULL)) ||
7351 (ctxt->parseMode == XML_PARSE_READER)) {
7352 list->parent = (xmlNodePtr) ent;
7353 list = NULL;
7354 ent->owner = 1;
7355 } else {
7356 ent->owner = 0;
7357 while (list != NULL) {
7358 list->parent = (xmlNodePtr) ctxt->node;
7359 list->doc = ctxt->myDoc;
7360 if (list->next == NULL)
7361 ent->last = list;
7362 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007363 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007364 list = ent->children;
7365#ifdef LIBXML_LEGACY_ENABLED
7366 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7367 xmlAddEntityReference(ent, list, NULL);
7368#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007369 }
7370 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007371 ent->owner = 1;
7372 while (list != NULL) {
7373 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007374 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007375 if (list->next == NULL)
7376 ent->last = list;
7377 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007378 }
7379 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007380 } else {
7381 xmlFreeNodeList(list);
7382 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007383 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007384 } else if ((ret != XML_ERR_OK) &&
7385 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7386 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7387 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007388 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007389 } else if (list != NULL) {
7390 xmlFreeNodeList(list);
7391 list = NULL;
7392 }
7393 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007394 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007395 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007396 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007397 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007398
Daniel Veillard0161e632008-08-28 15:36:32 +00007399 /*
7400 * Now that the entity content has been gathered
7401 * provide it to the application, this can take different forms based
7402 * on the parsing modes.
7403 */
7404 if (ent->children == NULL) {
7405 /*
7406 * Probably running in SAX mode and the callbacks don't
7407 * build the entity content. So unless we already went
7408 * though parsing for first checking go though the entity
7409 * content to generate callbacks associated to the entity
7410 */
7411 if (was_checked != 0) {
7412 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007413 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007414 * This is a bit hackish but this seems the best
7415 * way to make sure both SAX and DOM entity support
7416 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007417 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007418 if (ctxt->userData == ctxt)
7419 user_data = NULL;
7420 else
7421 user_data = ctxt->userData;
7422
7423 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7424 ctxt->depth++;
7425 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7426 ent->content, user_data, NULL);
7427 ctxt->depth--;
7428 } else if (ent->etype ==
7429 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7430 ctxt->depth++;
7431 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7432 ctxt->sax, user_data, ctxt->depth,
7433 ent->URI, ent->ExternalID, NULL);
7434 ctxt->depth--;
7435 } else {
7436 ret = XML_ERR_ENTITY_PE_INTERNAL;
7437 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7438 "invalid entity type found\n", NULL);
7439 }
7440 if (ret == XML_ERR_ENTITY_LOOP) {
7441 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7442 return;
7443 }
7444 }
7445 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7446 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7447 /*
7448 * Entity reference callback comes second, it's somewhat
7449 * superfluous but a compatibility to historical behaviour
7450 */
7451 ctxt->sax->reference(ctxt->userData, ent->name);
7452 }
7453 return;
7454 }
7455
7456 /*
7457 * If we didn't get any children for the entity being built
7458 */
7459 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7460 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7461 /*
7462 * Create a node.
7463 */
7464 ctxt->sax->reference(ctxt->userData, ent->name);
7465 return;
7466 }
7467
7468 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7469 /*
7470 * There is a problem on the handling of _private for entities
7471 * (bug 155816): Should we copy the content of the field from
7472 * the entity (possibly overwriting some value set by the user
7473 * when a copy is created), should we leave it alone, or should
7474 * we try to take care of different situations? The problem
7475 * is exacerbated by the usage of this field by the xmlReader.
7476 * To fix this bug, we look at _private on the created node
7477 * and, if it's NULL, we copy in whatever was in the entity.
7478 * If it's not NULL we leave it alone. This is somewhat of a
7479 * hack - maybe we should have further tests to determine
7480 * what to do.
7481 */
7482 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7483 /*
7484 * Seems we are generating the DOM content, do
7485 * a simple tree copy for all references except the first
7486 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007487 */
7488 if (((list == NULL) && (ent->owner == 0)) ||
7489 (ctxt->parseMode == XML_PARSE_READER)) {
7490 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7491
7492 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007493 * We are copying here, make sure there is no abuse
7494 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007495 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007496 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7497 return;
7498
7499 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007500 * when operating on a reader, the entities definitions
7501 * are always owning the entities subtree.
7502 if (ctxt->parseMode == XML_PARSE_READER)
7503 ent->owner = 1;
7504 */
7505
7506 cur = ent->children;
7507 while (cur != NULL) {
7508 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7509 if (nw != NULL) {
7510 if (nw->_private == NULL)
7511 nw->_private = cur->_private;
7512 if (firstChild == NULL){
7513 firstChild = nw;
7514 }
7515 nw = xmlAddChild(ctxt->node, nw);
7516 }
7517 if (cur == ent->last) {
7518 /*
7519 * needed to detect some strange empty
7520 * node cases in the reader tests
7521 */
7522 if ((ctxt->parseMode == XML_PARSE_READER) &&
7523 (nw != NULL) &&
7524 (nw->type == XML_ELEMENT_NODE) &&
7525 (nw->children == NULL))
7526 nw->extra = 1;
7527
7528 break;
7529 }
7530 cur = cur->next;
7531 }
7532#ifdef LIBXML_LEGACY_ENABLED
7533 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7534 xmlAddEntityReference(ent, firstChild, nw);
7535#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007536 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007537 xmlNodePtr nw = NULL, cur, next, last,
7538 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007539
7540 /*
7541 * We are copying here, make sure there is no abuse
7542 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007543 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007544 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7545 return;
7546
Daniel Veillard0161e632008-08-28 15:36:32 +00007547 /*
7548 * Copy the entity child list and make it the new
7549 * entity child list. The goal is to make sure any
7550 * ID or REF referenced will be the one from the
7551 * document content and not the entity copy.
7552 */
7553 cur = ent->children;
7554 ent->children = NULL;
7555 last = ent->last;
7556 ent->last = NULL;
7557 while (cur != NULL) {
7558 next = cur->next;
7559 cur->next = NULL;
7560 cur->parent = NULL;
7561 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7562 if (nw != NULL) {
7563 if (nw->_private == NULL)
7564 nw->_private = cur->_private;
7565 if (firstChild == NULL){
7566 firstChild = cur;
7567 }
7568 xmlAddChild((xmlNodePtr) ent, nw);
7569 xmlAddChild(ctxt->node, cur);
7570 }
7571 if (cur == last)
7572 break;
7573 cur = next;
7574 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007575 if (ent->owner == 0)
7576 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007577#ifdef LIBXML_LEGACY_ENABLED
7578 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7579 xmlAddEntityReference(ent, firstChild, nw);
7580#endif /* LIBXML_LEGACY_ENABLED */
7581 } else {
7582 const xmlChar *nbktext;
7583
7584 /*
7585 * the name change is to avoid coalescing of the
7586 * node with a possible previous text one which
7587 * would make ent->children a dangling pointer
7588 */
7589 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7590 -1);
7591 if (ent->children->type == XML_TEXT_NODE)
7592 ent->children->name = nbktext;
7593 if ((ent->last != ent->children) &&
7594 (ent->last->type == XML_TEXT_NODE))
7595 ent->last->name = nbktext;
7596 xmlAddChildList(ctxt->node, ent->children);
7597 }
7598
7599 /*
7600 * This is to avoid a nasty side effect, see
7601 * characters() in SAX.c
7602 */
7603 ctxt->nodemem = 0;
7604 ctxt->nodelen = 0;
7605 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007606 }
7607 }
7608}
7609
7610/**
7611 * xmlParseEntityRef:
7612 * @ctxt: an XML parser context
7613 *
7614 * parse ENTITY references declarations
7615 *
7616 * [68] EntityRef ::= '&' Name ';'
7617 *
7618 * [ WFC: Entity Declared ]
7619 * In a document without any DTD, a document with only an internal DTD
7620 * subset which contains no parameter entity references, or a document
7621 * with "standalone='yes'", the Name given in the entity reference
7622 * must match that in an entity declaration, except that well-formed
7623 * documents need not declare any of the following entities: amp, lt,
7624 * gt, apos, quot. The declaration of a parameter entity must precede
7625 * any reference to it. Similarly, the declaration of a general entity
7626 * must precede any reference to it which appears in a default value in an
7627 * attribute-list declaration. Note that if entities are declared in the
7628 * external subset or in external parameter entities, a non-validating
7629 * processor is not obligated to read and process their declarations;
7630 * for such documents, the rule that an entity must be declared is a
7631 * well-formedness constraint only if standalone='yes'.
7632 *
7633 * [ WFC: Parsed Entity ]
7634 * An entity reference must not contain the name of an unparsed entity
7635 *
7636 * Returns the xmlEntityPtr if found, or NULL otherwise.
7637 */
7638xmlEntityPtr
7639xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007640 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007641 xmlEntityPtr ent = NULL;
7642
7643 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007644 if (ctxt->instate == XML_PARSER_EOF)
7645 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007646
Daniel Veillard0161e632008-08-28 15:36:32 +00007647 if (RAW != '&')
7648 return(NULL);
7649 NEXT;
7650 name = xmlParseName(ctxt);
7651 if (name == NULL) {
7652 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7653 "xmlParseEntityRef: no name\n");
7654 return(NULL);
7655 }
7656 if (RAW != ';') {
7657 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7658 return(NULL);
7659 }
7660 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007661
Daniel Veillard0161e632008-08-28 15:36:32 +00007662 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007663 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007664 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007665 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7666 ent = xmlGetPredefinedEntity(name);
7667 if (ent != NULL)
7668 return(ent);
7669 }
Owen Taylor3473f882001-02-23 17:55:21 +00007670
Daniel Veillard0161e632008-08-28 15:36:32 +00007671 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007672 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007673 */
7674 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007675
Daniel Veillard0161e632008-08-28 15:36:32 +00007676 /*
7677 * Ask first SAX for entity resolution, otherwise try the
7678 * entities which may have stored in the parser context.
7679 */
7680 if (ctxt->sax != NULL) {
7681 if (ctxt->sax->getEntity != NULL)
7682 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007683 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007684 (ctxt->options & XML_PARSE_OLDSAX))
7685 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007686 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7687 (ctxt->userData==ctxt)) {
7688 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007689 }
7690 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007691 if (ctxt->instate == XML_PARSER_EOF)
7692 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007693 /*
7694 * [ WFC: Entity Declared ]
7695 * In a document without any DTD, a document with only an
7696 * internal DTD subset which contains no parameter entity
7697 * references, or a document with "standalone='yes'", the
7698 * Name given in the entity reference must match that in an
7699 * entity declaration, except that well-formed documents
7700 * need not declare any of the following entities: amp, lt,
7701 * gt, apos, quot.
7702 * The declaration of a parameter entity must precede any
7703 * reference to it.
7704 * Similarly, the declaration of a general entity must
7705 * precede any reference to it which appears in a default
7706 * value in an attribute-list declaration. Note that if
7707 * entities are declared in the external subset or in
7708 * external parameter entities, a non-validating processor
7709 * is not obligated to read and process their declarations;
7710 * for such documents, the rule that an entity must be
7711 * declared is a well-formedness constraint only if
7712 * standalone='yes'.
7713 */
7714 if (ent == NULL) {
7715 if ((ctxt->standalone == 1) ||
7716 ((ctxt->hasExternalSubset == 0) &&
7717 (ctxt->hasPErefs == 0))) {
7718 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7719 "Entity '%s' not defined\n", name);
7720 } else {
7721 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7722 "Entity '%s' not defined\n", name);
7723 if ((ctxt->inSubset == 0) &&
7724 (ctxt->sax != NULL) &&
7725 (ctxt->sax->reference != NULL)) {
7726 ctxt->sax->reference(ctxt->userData, name);
7727 }
7728 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007729 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007730 ctxt->valid = 0;
7731 }
7732
7733 /*
7734 * [ WFC: Parsed Entity ]
7735 * An entity reference must not contain the name of an
7736 * unparsed entity
7737 */
7738 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7739 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7740 "Entity reference to unparsed entity %s\n", name);
7741 }
7742
7743 /*
7744 * [ WFC: No External Entity References ]
7745 * Attribute values cannot contain direct or indirect
7746 * entity references to external entities.
7747 */
7748 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7749 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7750 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7751 "Attribute references external entity '%s'\n", name);
7752 }
7753 /*
7754 * [ WFC: No < in Attribute Values ]
7755 * The replacement text of any entity referred to directly or
7756 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007757 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007758 */
7759 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007760 (ent != NULL) &&
7761 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007762 if (((ent->checked & 1) || (ent->checked == 0)) &&
7763 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007764 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7765 "'<' in entity '%s' is not allowed in attributes values\n", name);
7766 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007767 }
7768
7769 /*
7770 * Internal check, no parameter entities here ...
7771 */
7772 else {
7773 switch (ent->etype) {
7774 case XML_INTERNAL_PARAMETER_ENTITY:
7775 case XML_EXTERNAL_PARAMETER_ENTITY:
7776 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7777 "Attempt to reference the parameter entity '%s'\n",
7778 name);
7779 break;
7780 default:
7781 break;
7782 }
7783 }
7784
7785 /*
7786 * [ WFC: No Recursion ]
7787 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007788 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007789 * Done somewhere else
7790 */
Owen Taylor3473f882001-02-23 17:55:21 +00007791 return(ent);
7792}
7793
7794/**
7795 * xmlParseStringEntityRef:
7796 * @ctxt: an XML parser context
7797 * @str: a pointer to an index in the string
7798 *
7799 * parse ENTITY references declarations, but this version parses it from
7800 * a string value.
7801 *
7802 * [68] EntityRef ::= '&' Name ';'
7803 *
7804 * [ WFC: Entity Declared ]
7805 * In a document without any DTD, a document with only an internal DTD
7806 * subset which contains no parameter entity references, or a document
7807 * with "standalone='yes'", the Name given in the entity reference
7808 * must match that in an entity declaration, except that well-formed
7809 * documents need not declare any of the following entities: amp, lt,
7810 * gt, apos, quot. The declaration of a parameter entity must precede
7811 * any reference to it. Similarly, the declaration of a general entity
7812 * must precede any reference to it which appears in a default value in an
7813 * attribute-list declaration. Note that if entities are declared in the
7814 * external subset or in external parameter entities, a non-validating
7815 * processor is not obligated to read and process their declarations;
7816 * for such documents, the rule that an entity must be declared is a
7817 * well-formedness constraint only if standalone='yes'.
7818 *
7819 * [ WFC: Parsed Entity ]
7820 * An entity reference must not contain the name of an unparsed entity
7821 *
7822 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7823 * is updated to the current location in the string.
7824 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007825static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007826xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7827 xmlChar *name;
7828 const xmlChar *ptr;
7829 xmlChar cur;
7830 xmlEntityPtr ent = NULL;
7831
7832 if ((str == NULL) || (*str == NULL))
7833 return(NULL);
7834 ptr = *str;
7835 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007836 if (cur != '&')
7837 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007838
Daniel Veillard0161e632008-08-28 15:36:32 +00007839 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007840 name = xmlParseStringName(ctxt, &ptr);
7841 if (name == NULL) {
7842 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7843 "xmlParseStringEntityRef: no name\n");
7844 *str = ptr;
7845 return(NULL);
7846 }
7847 if (*ptr != ';') {
7848 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007849 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007850 *str = ptr;
7851 return(NULL);
7852 }
7853 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007854
Owen Taylor3473f882001-02-23 17:55:21 +00007855
Daniel Veillard0161e632008-08-28 15:36:32 +00007856 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007857 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007858 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007859 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7860 ent = xmlGetPredefinedEntity(name);
7861 if (ent != NULL) {
7862 xmlFree(name);
7863 *str = ptr;
7864 return(ent);
7865 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007866 }
Owen Taylor3473f882001-02-23 17:55:21 +00007867
Daniel Veillard0161e632008-08-28 15:36:32 +00007868 /*
7869 * Increate the number of entity references parsed
7870 */
7871 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007872
Daniel Veillard0161e632008-08-28 15:36:32 +00007873 /*
7874 * Ask first SAX for entity resolution, otherwise try the
7875 * entities which may have stored in the parser context.
7876 */
7877 if (ctxt->sax != NULL) {
7878 if (ctxt->sax->getEntity != NULL)
7879 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007880 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7881 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007882 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7883 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007884 }
7885 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007886 if (ctxt->instate == XML_PARSER_EOF) {
7887 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007888 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007889 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007890
7891 /*
7892 * [ WFC: Entity Declared ]
7893 * In a document without any DTD, a document with only an
7894 * internal DTD subset which contains no parameter entity
7895 * references, or a document with "standalone='yes'", the
7896 * Name given in the entity reference must match that in an
7897 * entity declaration, except that well-formed documents
7898 * need not declare any of the following entities: amp, lt,
7899 * gt, apos, quot.
7900 * The declaration of a parameter entity must precede any
7901 * reference to it.
7902 * Similarly, the declaration of a general entity must
7903 * precede any reference to it which appears in a default
7904 * value in an attribute-list declaration. Note that if
7905 * entities are declared in the external subset or in
7906 * external parameter entities, a non-validating processor
7907 * is not obligated to read and process their declarations;
7908 * for such documents, the rule that an entity must be
7909 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007910 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007911 */
7912 if (ent == NULL) {
7913 if ((ctxt->standalone == 1) ||
7914 ((ctxt->hasExternalSubset == 0) &&
7915 (ctxt->hasPErefs == 0))) {
7916 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7917 "Entity '%s' not defined\n", name);
7918 } else {
7919 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7920 "Entity '%s' not defined\n",
7921 name);
7922 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007923 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007924 /* TODO ? check regressions ctxt->valid = 0; */
7925 }
7926
7927 /*
7928 * [ WFC: Parsed Entity ]
7929 * An entity reference must not contain the name of an
7930 * unparsed entity
7931 */
7932 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7933 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7934 "Entity reference to unparsed entity %s\n", name);
7935 }
7936
7937 /*
7938 * [ WFC: No External Entity References ]
7939 * Attribute values cannot contain direct or indirect
7940 * entity references to external entities.
7941 */
7942 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7943 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7944 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7945 "Attribute references external entity '%s'\n", name);
7946 }
7947 /*
7948 * [ WFC: No < in Attribute Values ]
7949 * The replacement text of any entity referred to directly or
7950 * indirectly in an attribute value (other than "&lt;") must
7951 * not contain a <.
7952 */
7953 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7954 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007955 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007956 (xmlStrchr(ent->content, '<'))) {
7957 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7958 "'<' in entity '%s' is not allowed in attributes values\n",
7959 name);
7960 }
7961
7962 /*
7963 * Internal check, no parameter entities here ...
7964 */
7965 else {
7966 switch (ent->etype) {
7967 case XML_INTERNAL_PARAMETER_ENTITY:
7968 case XML_EXTERNAL_PARAMETER_ENTITY:
7969 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7970 "Attempt to reference the parameter entity '%s'\n",
7971 name);
7972 break;
7973 default:
7974 break;
7975 }
7976 }
7977
7978 /*
7979 * [ WFC: No Recursion ]
7980 * A parsed entity must not contain a recursive reference
7981 * to itself, either directly or indirectly.
7982 * Done somewhere else
7983 */
7984
7985 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007986 *str = ptr;
7987 return(ent);
7988}
7989
7990/**
7991 * xmlParsePEReference:
7992 * @ctxt: an XML parser context
7993 *
7994 * parse PEReference declarations
7995 * The entity content is handled directly by pushing it's content as
7996 * a new input stream.
7997 *
7998 * [69] PEReference ::= '%' Name ';'
7999 *
8000 * [ WFC: No Recursion ]
8001 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008002 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008003 *
8004 * [ WFC: Entity Declared ]
8005 * In a document without any DTD, a document with only an internal DTD
8006 * subset which contains no parameter entity references, or a document
8007 * with "standalone='yes'", ... ... The declaration of a parameter
8008 * entity must precede any reference to it...
8009 *
8010 * [ VC: Entity Declared ]
8011 * In a document with an external subset or external parameter entities
8012 * with "standalone='no'", ... ... The declaration of a parameter entity
8013 * must precede any reference to it...
8014 *
8015 * [ WFC: In DTD ]
8016 * Parameter-entity references may only appear in the DTD.
8017 * NOTE: misleading but this is handled.
8018 */
8019void
Daniel Veillard8f597c32003-10-06 08:19:27 +00008020xmlParsePEReference(xmlParserCtxtPtr ctxt)
8021{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008022 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008023 xmlEntityPtr entity = NULL;
8024 xmlParserInputPtr input;
8025
Daniel Veillard0161e632008-08-28 15:36:32 +00008026 if (RAW != '%')
8027 return;
8028 NEXT;
8029 name = xmlParseName(ctxt);
8030 if (name == NULL) {
8031 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8032 "xmlParsePEReference: no name\n");
8033 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008034 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008035 if (RAW != ';') {
8036 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8037 return;
8038 }
8039
8040 NEXT;
8041
8042 /*
8043 * Increate the number of entity references parsed
8044 */
8045 ctxt->nbentities++;
8046
8047 /*
8048 * Request the entity from SAX
8049 */
8050 if ((ctxt->sax != NULL) &&
8051 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008052 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8053 if (ctxt->instate == XML_PARSER_EOF)
8054 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00008055 if (entity == NULL) {
8056 /*
8057 * [ WFC: Entity Declared ]
8058 * In a document without any DTD, a document with only an
8059 * internal DTD subset which contains no parameter entity
8060 * references, or a document with "standalone='yes'", ...
8061 * ... The declaration of a parameter entity must precede
8062 * any reference to it...
8063 */
8064 if ((ctxt->standalone == 1) ||
8065 ((ctxt->hasExternalSubset == 0) &&
8066 (ctxt->hasPErefs == 0))) {
8067 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8068 "PEReference: %%%s; not found\n",
8069 name);
8070 } else {
8071 /*
8072 * [ VC: Entity Declared ]
8073 * In a document with an external subset or external
8074 * parameter entities with "standalone='no'", ...
8075 * ... The declaration of a parameter entity must
8076 * precede any reference to it...
8077 */
8078 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8079 "PEReference: %%%s; not found\n",
8080 name, NULL);
8081 ctxt->valid = 0;
8082 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008083 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008084 } else {
8085 /*
8086 * Internal checking in case the entity quest barfed
8087 */
8088 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8089 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8090 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8091 "Internal: %%%s; is not a parameter entity\n",
8092 name, NULL);
8093 } else if (ctxt->input->free != deallocblankswrapper) {
8094 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8095 if (xmlPushInput(ctxt, input) < 0)
8096 return;
8097 } else {
8098 /*
8099 * TODO !!!
8100 * handle the extra spaces added before and after
8101 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8102 */
8103 input = xmlNewEntityInputStream(ctxt, entity);
8104 if (xmlPushInput(ctxt, input) < 0)
8105 return;
8106 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8107 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8108 (IS_BLANK_CH(NXT(5)))) {
8109 xmlParseTextDecl(ctxt);
8110 if (ctxt->errNo ==
8111 XML_ERR_UNSUPPORTED_ENCODING) {
8112 /*
8113 * The XML REC instructs us to stop parsing
8114 * right here
8115 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08008116 xmlHaltParser(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00008117 return;
8118 }
8119 }
8120 }
8121 }
8122 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008123}
8124
8125/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008126 * xmlLoadEntityContent:
8127 * @ctxt: an XML parser context
8128 * @entity: an unloaded system entity
8129 *
8130 * Load the original content of the given system entity from the
8131 * ExternalID/SystemID given. This is to be used for Included in Literal
8132 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8133 *
8134 * Returns 0 in case of success and -1 in case of failure
8135 */
8136static int
8137xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8138 xmlParserInputPtr input;
8139 xmlBufferPtr buf;
8140 int l, c;
8141 int count = 0;
8142
8143 if ((ctxt == NULL) || (entity == NULL) ||
8144 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8145 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8146 (entity->content != NULL)) {
8147 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8148 "xmlLoadEntityContent parameter error");
8149 return(-1);
8150 }
8151
8152 if (xmlParserDebugEntities)
8153 xmlGenericError(xmlGenericErrorContext,
8154 "Reading %s entity content input\n", entity->name);
8155
8156 buf = xmlBufferCreate();
8157 if (buf == NULL) {
8158 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8159 "xmlLoadEntityContent parameter error");
8160 return(-1);
8161 }
8162
8163 input = xmlNewEntityInputStream(ctxt, entity);
8164 if (input == NULL) {
8165 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8166 "xmlLoadEntityContent input error");
8167 xmlBufferFree(buf);
8168 return(-1);
8169 }
8170
8171 /*
8172 * Push the entity as the current input, read char by char
8173 * saving to the buffer until the end of the entity or an error
8174 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008175 if (xmlPushInput(ctxt, input) < 0) {
8176 xmlBufferFree(buf);
8177 return(-1);
8178 }
8179
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008180 GROW;
8181 c = CUR_CHAR(l);
8182 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8183 (IS_CHAR(c))) {
8184 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008185 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008186 count = 0;
8187 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008188 if (ctxt->instate == XML_PARSER_EOF) {
8189 xmlBufferFree(buf);
8190 return(-1);
8191 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008192 }
8193 NEXTL(l);
8194 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008195 if (c == 0) {
8196 count = 0;
8197 GROW;
8198 if (ctxt->instate == XML_PARSER_EOF) {
8199 xmlBufferFree(buf);
8200 return(-1);
8201 }
8202 c = CUR_CHAR(l);
8203 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008204 }
8205
8206 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8207 xmlPopInput(ctxt);
8208 } else if (!IS_CHAR(c)) {
8209 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8210 "xmlLoadEntityContent: invalid char value %d\n",
8211 c);
8212 xmlBufferFree(buf);
8213 return(-1);
8214 }
8215 entity->content = buf->content;
8216 buf->content = NULL;
8217 xmlBufferFree(buf);
8218
8219 return(0);
8220}
8221
8222/**
Owen Taylor3473f882001-02-23 17:55:21 +00008223 * xmlParseStringPEReference:
8224 * @ctxt: an XML parser context
8225 * @str: a pointer to an index in the string
8226 *
8227 * parse PEReference declarations
8228 *
8229 * [69] PEReference ::= '%' Name ';'
8230 *
8231 * [ WFC: No Recursion ]
8232 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008233 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008234 *
8235 * [ WFC: Entity Declared ]
8236 * In a document without any DTD, a document with only an internal DTD
8237 * subset which contains no parameter entity references, or a document
8238 * with "standalone='yes'", ... ... The declaration of a parameter
8239 * entity must precede any reference to it...
8240 *
8241 * [ VC: Entity Declared ]
8242 * In a document with an external subset or external parameter entities
8243 * with "standalone='no'", ... ... The declaration of a parameter entity
8244 * must precede any reference to it...
8245 *
8246 * [ WFC: In DTD ]
8247 * Parameter-entity references may only appear in the DTD.
8248 * NOTE: misleading but this is handled.
8249 *
8250 * Returns the string of the entity content.
8251 * str is updated to the current value of the index
8252 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008253static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008254xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8255 const xmlChar *ptr;
8256 xmlChar cur;
8257 xmlChar *name;
8258 xmlEntityPtr entity = NULL;
8259
8260 if ((str == NULL) || (*str == NULL)) return(NULL);
8261 ptr = *str;
8262 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008263 if (cur != '%')
8264 return(NULL);
8265 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008266 name = xmlParseStringName(ctxt, &ptr);
8267 if (name == NULL) {
8268 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8269 "xmlParseStringPEReference: no name\n");
8270 *str = ptr;
8271 return(NULL);
8272 }
8273 cur = *ptr;
8274 if (cur != ';') {
8275 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8276 xmlFree(name);
8277 *str = ptr;
8278 return(NULL);
8279 }
8280 ptr++;
8281
8282 /*
8283 * Increate the number of entity references parsed
8284 */
8285 ctxt->nbentities++;
8286
8287 /*
8288 * Request the entity from SAX
8289 */
8290 if ((ctxt->sax != NULL) &&
8291 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008292 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8293 if (ctxt->instate == XML_PARSER_EOF) {
8294 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008295 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008296 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008297 if (entity == NULL) {
8298 /*
8299 * [ WFC: Entity Declared ]
8300 * In a document without any DTD, a document with only an
8301 * internal DTD subset which contains no parameter entity
8302 * references, or a document with "standalone='yes'", ...
8303 * ... The declaration of a parameter entity must precede
8304 * any reference to it...
8305 */
8306 if ((ctxt->standalone == 1) ||
8307 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8308 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8309 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008310 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008311 /*
8312 * [ VC: Entity Declared ]
8313 * In a document with an external subset or external
8314 * parameter entities with "standalone='no'", ...
8315 * ... The declaration of a parameter entity must
8316 * precede any reference to it...
8317 */
8318 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8319 "PEReference: %%%s; not found\n",
8320 name, NULL);
8321 ctxt->valid = 0;
8322 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008323 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008324 } else {
8325 /*
8326 * Internal checking in case the entity quest barfed
8327 */
8328 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8329 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8330 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8331 "%%%s; is not a parameter entity\n",
8332 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008333 }
8334 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008335 ctxt->hasPErefs = 1;
8336 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008337 *str = ptr;
8338 return(entity);
8339}
8340
8341/**
8342 * xmlParseDocTypeDecl:
8343 * @ctxt: an XML parser context
8344 *
8345 * parse a DOCTYPE declaration
8346 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008347 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008348 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8349 *
8350 * [ VC: Root Element Type ]
8351 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008352 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008353 */
8354
8355void
8356xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008357 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008358 xmlChar *ExternalID = NULL;
8359 xmlChar *URI = NULL;
8360
8361 /*
8362 * We know that '<!DOCTYPE' has been detected.
8363 */
8364 SKIP(9);
8365
8366 SKIP_BLANKS;
8367
8368 /*
8369 * Parse the DOCTYPE name.
8370 */
8371 name = xmlParseName(ctxt);
8372 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008373 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8374 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008375 }
8376 ctxt->intSubName = name;
8377
8378 SKIP_BLANKS;
8379
8380 /*
8381 * Check for SystemID and ExternalID
8382 */
8383 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8384
8385 if ((URI != NULL) || (ExternalID != NULL)) {
8386 ctxt->hasExternalSubset = 1;
8387 }
8388 ctxt->extSubURI = URI;
8389 ctxt->extSubSystem = ExternalID;
8390
8391 SKIP_BLANKS;
8392
8393 /*
8394 * Create and update the internal subset.
8395 */
8396 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8397 (!ctxt->disableSAX))
8398 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008399 if (ctxt->instate == XML_PARSER_EOF)
8400 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008401
8402 /*
8403 * Is there any internal subset declarations ?
8404 * they are handled separately in xmlParseInternalSubset()
8405 */
8406 if (RAW == '[')
8407 return;
8408
8409 /*
8410 * We should be at the end of the DOCTYPE declaration.
8411 */
8412 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008413 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008414 }
8415 NEXT;
8416}
8417
8418/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008419 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008420 * @ctxt: an XML parser context
8421 *
8422 * parse the internal subset declaration
8423 *
8424 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8425 */
8426
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008427static void
Owen Taylor3473f882001-02-23 17:55:21 +00008428xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8429 /*
8430 * Is there any DTD definition ?
8431 */
8432 if (RAW == '[') {
8433 ctxt->instate = XML_PARSER_DTD;
8434 NEXT;
8435 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008436 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008437 * PEReferences.
8438 * Subsequence (markupdecl | PEReference | S)*
8439 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008440 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008441 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008442 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008443
8444 SKIP_BLANKS;
8445 xmlParseMarkupDecl(ctxt);
8446 xmlParsePEReference(ctxt);
8447
8448 /*
8449 * Pop-up of finished entities.
8450 */
8451 while ((RAW == 0) && (ctxt->inputNr > 1))
8452 xmlPopInput(ctxt);
8453
8454 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008455 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008456 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008457 break;
8458 }
8459 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008460 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008461 NEXT;
8462 SKIP_BLANKS;
8463 }
8464 }
8465
8466 /*
8467 * We should be at the end of the DOCTYPE declaration.
8468 */
8469 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008470 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Daniel Veillarda7a94612016-02-09 12:55:29 +01008471 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008472 }
8473 NEXT;
8474}
8475
Daniel Veillard81273902003-09-30 00:43:48 +00008476#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008477/**
8478 * xmlParseAttribute:
8479 * @ctxt: an XML parser context
8480 * @value: a xmlChar ** used to store the value of the attribute
8481 *
8482 * parse an attribute
8483 *
8484 * [41] Attribute ::= Name Eq AttValue
8485 *
8486 * [ WFC: No External Entity References ]
8487 * Attribute values cannot contain direct or indirect entity references
8488 * to external entities.
8489 *
8490 * [ WFC: No < in Attribute Values ]
8491 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008492 * an attribute value (other than "&lt;") must not contain a <.
8493 *
Owen Taylor3473f882001-02-23 17:55:21 +00008494 * [ VC: Attribute Value Type ]
8495 * The attribute must have been declared; the value must be of the type
8496 * declared for it.
8497 *
8498 * [25] Eq ::= S? '=' S?
8499 *
8500 * With namespace:
8501 *
8502 * [NS 11] Attribute ::= QName Eq AttValue
8503 *
8504 * Also the case QName == xmlns:??? is handled independently as a namespace
8505 * definition.
8506 *
8507 * Returns the attribute name, and the value in *value.
8508 */
8509
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008510const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008511xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008512 const xmlChar *name;
8513 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008514
8515 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008516 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008517 name = xmlParseName(ctxt);
8518 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008519 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008520 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008521 return(NULL);
8522 }
8523
8524 /*
8525 * read the value
8526 */
8527 SKIP_BLANKS;
8528 if (RAW == '=') {
8529 NEXT;
8530 SKIP_BLANKS;
8531 val = xmlParseAttValue(ctxt);
8532 ctxt->instate = XML_PARSER_CONTENT;
8533 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008534 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008535 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008536 return(NULL);
8537 }
8538
8539 /*
8540 * Check that xml:lang conforms to the specification
8541 * No more registered as an error, just generate a warning now
8542 * since this was deprecated in XML second edition
8543 */
8544 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8545 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008546 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8547 "Malformed value for xml:lang : %s\n",
8548 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008549 }
8550 }
8551
8552 /*
8553 * Check that xml:space conforms to the specification
8554 */
8555 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8556 if (xmlStrEqual(val, BAD_CAST "default"))
8557 *(ctxt->space) = 0;
8558 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8559 *(ctxt->space) = 1;
8560 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008561 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008562"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008563 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008564 }
8565 }
8566
8567 *value = val;
8568 return(name);
8569}
8570
8571/**
8572 * xmlParseStartTag:
8573 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008574 *
Owen Taylor3473f882001-02-23 17:55:21 +00008575 * parse a start of tag either for rule element or
8576 * EmptyElement. In both case we don't parse the tag closing chars.
8577 *
8578 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8579 *
8580 * [ WFC: Unique Att Spec ]
8581 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008582 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008583 *
8584 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8585 *
8586 * [ WFC: Unique Att Spec ]
8587 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008588 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008589 *
8590 * With namespace:
8591 *
8592 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8593 *
8594 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8595 *
8596 * Returns the element name parsed
8597 */
8598
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008599const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008600xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008601 const xmlChar *name;
8602 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008603 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008604 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008605 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008606 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008607 int i;
8608
8609 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008610 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008611
8612 name = xmlParseName(ctxt);
8613 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008614 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008615 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008616 return(NULL);
8617 }
8618
8619 /*
8620 * Now parse the attributes, it ends up with the ending
8621 *
8622 * (S Attribute)* S?
8623 */
8624 SKIP_BLANKS;
8625 GROW;
8626
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008627 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008628 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008629 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008630 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008631 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008632
8633 attname = xmlParseAttribute(ctxt, &attvalue);
8634 if ((attname != NULL) && (attvalue != NULL)) {
8635 /*
8636 * [ WFC: Unique Att Spec ]
8637 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008638 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008639 */
8640 for (i = 0; i < nbatts;i += 2) {
8641 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008642 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008643 xmlFree(attvalue);
8644 goto failed;
8645 }
8646 }
Owen Taylor3473f882001-02-23 17:55:21 +00008647 /*
8648 * Add the pair to atts
8649 */
8650 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008651 maxatts = 22; /* allow for 10 attrs by default */
8652 atts = (const xmlChar **)
8653 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008654 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008655 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008656 if (attvalue != NULL)
8657 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008658 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008659 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008660 ctxt->atts = atts;
8661 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008662 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008663 const xmlChar **n;
8664
Owen Taylor3473f882001-02-23 17:55:21 +00008665 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008666 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008667 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008668 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008669 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008670 if (attvalue != NULL)
8671 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008672 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008673 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008674 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008675 ctxt->atts = atts;
8676 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008677 }
8678 atts[nbatts++] = attname;
8679 atts[nbatts++] = attvalue;
8680 atts[nbatts] = NULL;
8681 atts[nbatts + 1] = NULL;
8682 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008683 if (attvalue != NULL)
8684 xmlFree(attvalue);
8685 }
8686
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008687failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008688
Daniel Veillard3772de32002-12-17 10:31:45 +00008689 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008690 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8691 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008692 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008693 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8694 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008695 }
8696 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008697 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8698 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008699 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8700 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008701 break;
8702 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008703 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008704 GROW;
8705 }
8706
8707 /*
8708 * SAX: Start of Element !
8709 */
8710 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008711 (!ctxt->disableSAX)) {
8712 if (nbatts > 0)
8713 ctxt->sax->startElement(ctxt->userData, name, atts);
8714 else
8715 ctxt->sax->startElement(ctxt->userData, name, NULL);
8716 }
Owen Taylor3473f882001-02-23 17:55:21 +00008717
8718 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008719 /* Free only the content strings */
8720 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008721 if (atts[i] != NULL)
8722 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008723 }
8724 return(name);
8725}
8726
8727/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008728 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008729 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730 * @line: line of the start tag
8731 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008732 *
8733 * parse an end of tag
8734 *
8735 * [42] ETag ::= '</' Name S? '>'
8736 *
8737 * With namespace
8738 *
8739 * [NS 9] ETag ::= '</' QName S? '>'
8740 */
8741
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008742static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008743xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008744 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008745
8746 GROW;
8747 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008748 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008749 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008750 return;
8751 }
8752 SKIP(2);
8753
Daniel Veillard46de64e2002-05-29 08:21:33 +00008754 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008755
8756 /*
8757 * We should definitely be at the ending "S? '>'" part
8758 */
8759 GROW;
8760 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008761 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008762 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008763 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008764 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008765
8766 /*
8767 * [ WFC: Element Type Match ]
8768 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008769 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008770 *
8771 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008772 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008773 if (name == NULL) name = BAD_CAST "unparseable";
8774 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008775 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008776 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008777 }
8778
8779 /*
8780 * SAX: End of Tag
8781 */
8782 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8783 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008784 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008785
Daniel Veillarde57ec792003-09-10 10:50:59 +00008786 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008787 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008788 return;
8789}
8790
8791/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008792 * xmlParseEndTag:
8793 * @ctxt: an XML parser context
8794 *
8795 * parse an end of tag
8796 *
8797 * [42] ETag ::= '</' Name S? '>'
8798 *
8799 * With namespace
8800 *
8801 * [NS 9] ETag ::= '</' QName S? '>'
8802 */
8803
8804void
8805xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008806 xmlParseEndTag1(ctxt, 0);
8807}
Daniel Veillard81273902003-09-30 00:43:48 +00008808#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008809
8810/************************************************************************
8811 * *
8812 * SAX 2 specific operations *
8813 * *
8814 ************************************************************************/
8815
Daniel Veillard0fb18932003-09-07 09:14:37 +00008816/*
8817 * xmlGetNamespace:
8818 * @ctxt: an XML parser context
8819 * @prefix: the prefix to lookup
8820 *
8821 * Lookup the namespace name for the @prefix (which ca be NULL)
8822 * The prefix must come from the @ctxt->dict dictionnary
8823 *
8824 * Returns the namespace name or NULL if not bound
8825 */
8826static const xmlChar *
8827xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8828 int i;
8829
Daniel Veillarde57ec792003-09-10 10:50:59 +00008830 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008831 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008832 if (ctxt->nsTab[i] == prefix) {
8833 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8834 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008835 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008836 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008837 return(NULL);
8838}
8839
8840/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008841 * xmlParseQName:
8842 * @ctxt: an XML parser context
8843 * @prefix: pointer to store the prefix part
8844 *
8845 * parse an XML Namespace QName
8846 *
8847 * [6] QName ::= (Prefix ':')? LocalPart
8848 * [7] Prefix ::= NCName
8849 * [8] LocalPart ::= NCName
8850 *
8851 * Returns the Name parsed or NULL
8852 */
8853
8854static const xmlChar *
8855xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8856 const xmlChar *l, *p;
8857
8858 GROW;
8859
8860 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008861 if (l == NULL) {
8862 if (CUR == ':') {
8863 l = xmlParseName(ctxt);
8864 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008865 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008866 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008867 *prefix = NULL;
8868 return(l);
8869 }
8870 }
8871 return(NULL);
8872 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008873 if (CUR == ':') {
8874 NEXT;
8875 p = l;
8876 l = xmlParseNCName(ctxt);
8877 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008878 xmlChar *tmp;
8879
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008880 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8881 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008882 l = xmlParseNmtoken(ctxt);
8883 if (l == NULL)
8884 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8885 else {
8886 tmp = xmlBuildQName(l, p, NULL, 0);
8887 xmlFree((char *)l);
8888 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008889 p = xmlDictLookup(ctxt->dict, tmp, -1);
8890 if (tmp != NULL) xmlFree(tmp);
8891 *prefix = NULL;
8892 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008893 }
8894 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008895 xmlChar *tmp;
8896
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008897 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8898 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008899 NEXT;
8900 tmp = (xmlChar *) xmlParseName(ctxt);
8901 if (tmp != NULL) {
8902 tmp = xmlBuildQName(tmp, l, NULL, 0);
8903 l = xmlDictLookup(ctxt->dict, tmp, -1);
8904 if (tmp != NULL) xmlFree(tmp);
8905 *prefix = p;
8906 return(l);
8907 }
8908 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8909 l = xmlDictLookup(ctxt->dict, tmp, -1);
8910 if (tmp != NULL) xmlFree(tmp);
8911 *prefix = p;
8912 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008913 }
8914 *prefix = p;
8915 } else
8916 *prefix = NULL;
8917 return(l);
8918}
8919
8920/**
8921 * xmlParseQNameAndCompare:
8922 * @ctxt: an XML parser context
8923 * @name: the localname
8924 * @prefix: the prefix, if any.
8925 *
8926 * parse an XML name and compares for match
8927 * (specialized for endtag parsing)
8928 *
8929 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8930 * and the name for mismatch
8931 */
8932
8933static const xmlChar *
8934xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8935 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008936 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008937 const xmlChar *in;
8938 const xmlChar *ret;
8939 const xmlChar *prefix2;
8940
8941 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8942
8943 GROW;
8944 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008945
Daniel Veillard0fb18932003-09-07 09:14:37 +00008946 cmp = prefix;
8947 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008948 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008949 ++cmp;
8950 }
8951 if ((*cmp == 0) && (*in == ':')) {
8952 in++;
8953 cmp = name;
8954 while (*in != 0 && *in == *cmp) {
8955 ++in;
8956 ++cmp;
8957 }
William M. Brack76e95df2003-10-18 16:20:14 +00008958 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008959 /* success */
8960 ctxt->input->cur = in;
8961 return((const xmlChar*) 1);
8962 }
8963 }
8964 /*
8965 * all strings coms from the dictionary, equality can be done directly
8966 */
8967 ret = xmlParseQName (ctxt, &prefix2);
8968 if ((ret == name) && (prefix == prefix2))
8969 return((const xmlChar*) 1);
8970 return ret;
8971}
8972
8973/**
8974 * xmlParseAttValueInternal:
8975 * @ctxt: an XML parser context
8976 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008977 * @alloc: whether the attribute was reallocated as a new string
8978 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008979 *
8980 * parse a value for an attribute.
8981 * NOTE: if no normalization is needed, the routine will return pointers
8982 * directly from the data buffer.
8983 *
8984 * 3.3.3 Attribute-Value Normalization:
8985 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008986 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008987 * - a character reference is processed by appending the referenced
8988 * character to the attribute value
8989 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008990 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008991 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8992 * appending #x20 to the normalized value, except that only a single
8993 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008994 * parsed entity or the literal entity value of an internal parsed entity
8995 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008996 * If the declared value is not CDATA, then the XML processor must further
8997 * process the normalized attribute value by discarding any leading and
8998 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008999 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009000 * All attributes for which no declaration has been read should be treated
9001 * by a non-validating parser as if declared CDATA.
9002 *
9003 * Returns the AttValue parsed or NULL. The value has to be freed by the
9004 * caller if it was copied, this can be detected by val[*len] == 0.
9005 */
9006
9007static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009008xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9009 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009010{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009011 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009012 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009013 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08009014 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009015
9016 GROW;
9017 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08009018 line = ctxt->input->line;
9019 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009020 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009021 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009022 return (NULL);
9023 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009024 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009025
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009026 /*
9027 * try to handle in this routine the most common case where no
9028 * allocation of a new string is required and where content is
9029 * pure ASCII.
9030 */
9031 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009032 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009033 end = ctxt->input->end;
9034 start = in;
9035 if (in >= end) {
9036 const xmlChar *oldbase = ctxt->input->base;
9037 GROW;
9038 if (oldbase != ctxt->input->base) {
9039 long delta = ctxt->input->base - oldbase;
9040 start = start + delta;
9041 in = in + delta;
9042 }
9043 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009044 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009045 if (normalize) {
9046 /*
9047 * Skip any leading spaces
9048 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009049 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009050 ((*in == 0x20) || (*in == 0x9) ||
9051 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009052 if (*in == 0xA) {
9053 line++; col = 1;
9054 } else {
9055 col++;
9056 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009057 in++;
9058 start = in;
9059 if (in >= end) {
9060 const xmlChar *oldbase = ctxt->input->base;
9061 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009062 if (ctxt->instate == XML_PARSER_EOF)
9063 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009064 if (oldbase != ctxt->input->base) {
9065 long delta = ctxt->input->base - oldbase;
9066 start = start + delta;
9067 in = in + delta;
9068 }
9069 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009070 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9071 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9072 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009073 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009074 return(NULL);
9075 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009076 }
9077 }
9078 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9079 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009080 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009081 if ((*in++ == 0x20) && (*in == 0x20)) break;
9082 if (in >= end) {
9083 const xmlChar *oldbase = ctxt->input->base;
9084 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009085 if (ctxt->instate == XML_PARSER_EOF)
9086 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009087 if (oldbase != ctxt->input->base) {
9088 long delta = ctxt->input->base - oldbase;
9089 start = start + delta;
9090 in = in + delta;
9091 }
9092 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009093 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9094 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9095 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009096 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009097 return(NULL);
9098 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009099 }
9100 }
9101 last = in;
9102 /*
9103 * skip the trailing blanks
9104 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009105 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009106 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009107 ((*in == 0x20) || (*in == 0x9) ||
9108 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009109 if (*in == 0xA) {
9110 line++, col = 1;
9111 } else {
9112 col++;
9113 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009114 in++;
9115 if (in >= end) {
9116 const xmlChar *oldbase = ctxt->input->base;
9117 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009118 if (ctxt->instate == XML_PARSER_EOF)
9119 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009120 if (oldbase != ctxt->input->base) {
9121 long delta = ctxt->input->base - oldbase;
9122 start = start + delta;
9123 in = in + delta;
9124 last = last + delta;
9125 }
9126 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009127 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9128 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9129 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009130 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009131 return(NULL);
9132 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009133 }
9134 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009135 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9136 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9137 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009138 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009139 return(NULL);
9140 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009141 if (*in != limit) goto need_complex;
9142 } else {
9143 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9144 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9145 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009146 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009147 if (in >= end) {
9148 const xmlChar *oldbase = ctxt->input->base;
9149 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009150 if (ctxt->instate == XML_PARSER_EOF)
9151 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009152 if (oldbase != ctxt->input->base) {
9153 long delta = ctxt->input->base - oldbase;
9154 start = start + delta;
9155 in = in + delta;
9156 }
9157 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009158 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9159 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9160 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009161 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009162 return(NULL);
9163 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009164 }
9165 }
9166 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009167 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9168 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9169 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009170 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009171 return(NULL);
9172 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009173 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009174 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009175 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009176 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009177 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009178 *len = last - start;
9179 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009180 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009181 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009182 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009183 }
9184 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009185 ctxt->input->line = line;
9186 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009187 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009188 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009189need_complex:
9190 if (alloc) *alloc = 1;
9191 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009192}
9193
9194/**
9195 * xmlParseAttribute2:
9196 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009197 * @pref: the element prefix
9198 * @elem: the element name
9199 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009200 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009201 * @len: an int * to save the length of the attribute
9202 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009203 *
9204 * parse an attribute in the new SAX2 framework.
9205 *
9206 * Returns the attribute name, and the value in *value, .
9207 */
9208
9209static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009210xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009211 const xmlChar * pref, const xmlChar * elem,
9212 const xmlChar ** prefix, xmlChar ** value,
9213 int *len, int *alloc)
9214{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009215 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009216 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009217 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009218
9219 *value = NULL;
9220 GROW;
9221 name = xmlParseQName(ctxt, prefix);
9222 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009223 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9224 "error parsing attribute name\n");
9225 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009226 }
9227
9228 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009229 * get the type if needed
9230 */
9231 if (ctxt->attsSpecial != NULL) {
9232 int type;
9233
9234 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009235 pref, elem, *prefix, name);
9236 if (type != 0)
9237 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009238 }
9239
9240 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009241 * read the value
9242 */
9243 SKIP_BLANKS;
9244 if (RAW == '=') {
9245 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009246 SKIP_BLANKS;
9247 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9248 if (normalize) {
9249 /*
9250 * Sometimes a second normalisation pass for spaces is needed
9251 * but that only happens if charrefs or entities refernces
9252 * have been used in the attribute value, i.e. the attribute
9253 * value have been extracted in an allocated string already.
9254 */
9255 if (*alloc) {
9256 const xmlChar *val2;
9257
9258 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009259 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009260 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009261 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009262 }
9263 }
9264 }
9265 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009266 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009267 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9268 "Specification mandate value for attribute %s\n",
9269 name);
9270 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009271 }
9272
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009273 if (*prefix == ctxt->str_xml) {
9274 /*
9275 * Check that xml:lang conforms to the specification
9276 * No more registered as an error, just generate a warning now
9277 * since this was deprecated in XML second edition
9278 */
9279 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9280 internal_val = xmlStrndup(val, *len);
9281 if (!xmlCheckLanguageID(internal_val)) {
9282 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9283 "Malformed value for xml:lang : %s\n",
9284 internal_val, NULL);
9285 }
9286 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009287
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009288 /*
9289 * Check that xml:space conforms to the specification
9290 */
9291 if (xmlStrEqual(name, BAD_CAST "space")) {
9292 internal_val = xmlStrndup(val, *len);
9293 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9294 *(ctxt->space) = 0;
9295 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9296 *(ctxt->space) = 1;
9297 else {
9298 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9299 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9300 internal_val, NULL);
9301 }
9302 }
9303 if (internal_val) {
9304 xmlFree(internal_val);
9305 }
9306 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009307
9308 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009309 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009310}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009311/**
9312 * xmlParseStartTag2:
9313 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009314 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009315 * parse a start of tag either for rule element or
9316 * EmptyElement. In both case we don't parse the tag closing chars.
9317 * This routine is called when running SAX2 parsing
9318 *
9319 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9320 *
9321 * [ WFC: Unique Att Spec ]
9322 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009323 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009324 *
9325 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9326 *
9327 * [ WFC: Unique Att Spec ]
9328 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009329 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009330 *
9331 * With namespace:
9332 *
9333 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9334 *
9335 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9336 *
9337 * Returns the element name parsed
9338 */
9339
9340static const xmlChar *
9341xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009342 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009343 const xmlChar *localname;
9344 const xmlChar *prefix;
9345 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009346 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009347 const xmlChar *nsname;
9348 xmlChar *attvalue;
9349 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009350 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009351 int nratts, nbatts, nbdef;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009352 int i, j, nbNs, attval, oldline, oldcol, inputNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009353 const xmlChar *base;
9354 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009355 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009356
9357 if (RAW != '<') return(NULL);
9358 NEXT1;
9359
9360 /*
9361 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9362 * point since the attribute values may be stored as pointers to
9363 * the buffer and calling SHRINK would destroy them !
9364 * The Shrinking is only possible once the full set of attribute
9365 * callbacks have been done.
9366 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009367reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009368 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009369 base = ctxt->input->base;
9370 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009371 inputNr = ctxt->inputNr;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009372 oldline = ctxt->input->line;
9373 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009374 nbatts = 0;
9375 nratts = 0;
9376 nbdef = 0;
9377 nbNs = 0;
9378 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009379 /* Forget any namespaces added during an earlier parse of this element. */
9380 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009381
9382 localname = xmlParseQName(ctxt, &prefix);
9383 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009384 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9385 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009386 return(NULL);
9387 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009388 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009389
9390 /*
9391 * Now parse the attributes, it ends up with the ending
9392 *
9393 * (S Attribute)* S?
9394 */
9395 SKIP_BLANKS;
9396 GROW;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009397 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9398 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009399
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009400 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009401 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009402 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009403 const xmlChar *q = CUR_PTR;
9404 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009405 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009406
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009407 attname = xmlParseAttribute2(ctxt, prefix, localname,
9408 &aprefix, &attvalue, &len, &alloc);
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009409 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr)) {
Daniel Veillarddcec6722006-10-15 20:32:53 +00009410 if ((attvalue != NULL) && (alloc != 0))
9411 xmlFree(attvalue);
9412 attvalue = NULL;
9413 goto base_changed;
9414 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009415 if ((attname != NULL) && (attvalue != NULL)) {
9416 if (len < 0) len = xmlStrlen(attvalue);
9417 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009418 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9419 xmlURIPtr uri;
9420
Daniel Veillardc836ba62014-07-14 16:39:50 +08009421 if (URL == NULL) {
9422 xmlErrMemory(ctxt, "dictionary allocation failure");
9423 if ((attvalue != NULL) && (alloc != 0))
9424 xmlFree(attvalue);
9425 return(NULL);
9426 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009427 if (*URL != 0) {
9428 uri = xmlParseURI((const char *) URL);
9429 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009430 xmlNsErr(ctxt, XML_WAR_NS_URI,
9431 "xmlns: '%s' is not a valid URI\n",
9432 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009433 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009434 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009435 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9436 "xmlns: URI %s is not absolute\n",
9437 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009438 }
9439 xmlFreeURI(uri);
9440 }
Daniel Veillard37334572008-07-31 08:20:02 +00009441 if (URL == ctxt->str_xml_ns) {
9442 if (attname != ctxt->str_xml) {
9443 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9444 "xml namespace URI cannot be the default namespace\n",
9445 NULL, NULL, NULL);
9446 }
9447 goto skip_default_ns;
9448 }
9449 if ((len == 29) &&
9450 (xmlStrEqual(URL,
9451 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9452 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9453 "reuse of the xmlns namespace name is forbidden\n",
9454 NULL, NULL, NULL);
9455 goto skip_default_ns;
9456 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009457 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009458 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009459 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009460 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009461 for (j = 1;j <= nbNs;j++)
9462 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9463 break;
9464 if (j <= nbNs)
9465 xmlErrAttributeDup(ctxt, NULL, attname);
9466 else
9467 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009468skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009469 if (alloc != 0) xmlFree(attvalue);
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009470 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9471 break;
9472 if (!IS_BLANK_CH(RAW)) {
9473 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9474 "attributes construct error\n");
9475 break;
9476 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009477 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009478 continue;
9479 }
9480 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009481 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9482 xmlURIPtr uri;
9483
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009484 if (attname == ctxt->str_xml) {
9485 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009486 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9487 "xml namespace prefix mapped to wrong URI\n",
9488 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009489 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009490 /*
9491 * Do not keep a namespace definition node
9492 */
Daniel Veillard37334572008-07-31 08:20:02 +00009493 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009494 }
Daniel Veillard37334572008-07-31 08:20:02 +00009495 if (URL == ctxt->str_xml_ns) {
9496 if (attname != ctxt->str_xml) {
9497 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9498 "xml namespace URI mapped to wrong prefix\n",
9499 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009500 }
Daniel Veillard37334572008-07-31 08:20:02 +00009501 goto skip_ns;
9502 }
9503 if (attname == ctxt->str_xmlns) {
9504 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9505 "redefinition of the xmlns prefix is forbidden\n",
9506 NULL, NULL, NULL);
9507 goto skip_ns;
9508 }
9509 if ((len == 29) &&
9510 (xmlStrEqual(URL,
9511 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9512 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9513 "reuse of the xmlns namespace name is forbidden\n",
9514 NULL, NULL, NULL);
9515 goto skip_ns;
9516 }
9517 if ((URL == NULL) || (URL[0] == 0)) {
9518 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9519 "xmlns:%s: Empty XML namespace is not allowed\n",
9520 attname, NULL, NULL);
9521 goto skip_ns;
9522 } else {
9523 uri = xmlParseURI((const char *) URL);
9524 if (uri == NULL) {
9525 xmlNsErr(ctxt, XML_WAR_NS_URI,
9526 "xmlns:%s: '%s' is not a valid URI\n",
9527 attname, URL, NULL);
9528 } else {
9529 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9530 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9531 "xmlns:%s: URI %s is not absolute\n",
9532 attname, URL, NULL);
9533 }
9534 xmlFreeURI(uri);
9535 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009536 }
9537
Daniel Veillard0fb18932003-09-07 09:14:37 +00009538 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009539 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009540 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009541 for (j = 1;j <= nbNs;j++)
9542 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9543 break;
9544 if (j <= nbNs)
9545 xmlErrAttributeDup(ctxt, aprefix, attname);
9546 else
9547 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009548skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009549 if (alloc != 0) xmlFree(attvalue);
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009550 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9551 break;
9552 if (!IS_BLANK_CH(RAW)) {
9553 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9554 "attributes construct error\n");
9555 break;
9556 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009557 SKIP_BLANKS;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009558 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9559 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009560 continue;
9561 }
9562
9563 /*
9564 * Add the pair to atts
9565 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009566 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9567 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009568 if (attvalue[len] == 0)
9569 xmlFree(attvalue);
9570 goto failed;
9571 }
9572 maxatts = ctxt->maxatts;
9573 atts = ctxt->atts;
9574 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009575 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009576 atts[nbatts++] = attname;
9577 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009578 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009579 atts[nbatts++] = attvalue;
9580 attvalue += len;
9581 atts[nbatts++] = attvalue;
9582 /*
9583 * tag if some deallocation is needed
9584 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009585 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009586 } else {
9587 if ((attvalue != NULL) && (attvalue[len] == 0))
9588 xmlFree(attvalue);
9589 }
9590
Daniel Veillard37334572008-07-31 08:20:02 +00009591failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009592
9593 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009594 if (ctxt->instate == XML_PARSER_EOF)
9595 break;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009596 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9597 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009598 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9599 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009600 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009601 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9602 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009603 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009604 }
9605 SKIP_BLANKS;
9606 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9607 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009608 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009609 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009610 break;
9611 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009612 GROW;
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009613 if ((ctxt->input->base != base) || (inputNr != ctxt->inputNr))
9614 goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009615 }
9616
Daniel Veillard0fb18932003-09-07 09:14:37 +00009617 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009618 * The attributes defaulting
9619 */
9620 if (ctxt->attsDefault != NULL) {
9621 xmlDefAttrsPtr defaults;
9622
9623 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9624 if (defaults != NULL) {
9625 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009626 attname = defaults->values[5 * i];
9627 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009628
9629 /*
9630 * special work for namespaces defaulted defs
9631 */
9632 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9633 /*
9634 * check that it's not a defined namespace
9635 */
9636 for (j = 1;j <= nbNs;j++)
9637 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9638 break;
9639 if (j <= nbNs) continue;
9640
9641 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009642 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009643 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009644 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009645 nbNs++;
9646 }
9647 } else if (aprefix == ctxt->str_xmlns) {
9648 /*
9649 * check that it's not a defined namespace
9650 */
9651 for (j = 1;j <= nbNs;j++)
9652 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9653 break;
9654 if (j <= nbNs) continue;
9655
9656 nsname = xmlGetNamespace(ctxt, attname);
9657 if (nsname != defaults->values[2]) {
9658 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009659 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009660 nbNs++;
9661 }
9662 } else {
9663 /*
9664 * check that it's not a defined attribute
9665 */
9666 for (j = 0;j < nbatts;j+=5) {
9667 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9668 break;
9669 }
9670 if (j < nbatts) continue;
9671
9672 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9673 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009674 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009675 }
9676 maxatts = ctxt->maxatts;
9677 atts = ctxt->atts;
9678 }
9679 atts[nbatts++] = attname;
9680 atts[nbatts++] = aprefix;
9681 if (aprefix == NULL)
9682 atts[nbatts++] = NULL;
9683 else
9684 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009685 atts[nbatts++] = defaults->values[5 * i + 2];
9686 atts[nbatts++] = defaults->values[5 * i + 3];
9687 if ((ctxt->standalone == 1) &&
9688 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009689 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009690 "standalone: attribute %s on %s defaulted from external subset\n",
9691 attname, localname);
9692 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009693 nbdef++;
9694 }
9695 }
9696 }
9697 }
9698
Daniel Veillarde70c8772003-11-25 07:21:18 +00009699 /*
9700 * The attributes checkings
9701 */
9702 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009703 /*
9704 * The default namespace does not apply to attribute names.
9705 */
9706 if (atts[i + 1] != NULL) {
9707 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9708 if (nsname == NULL) {
9709 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9710 "Namespace prefix %s for %s on %s is not defined\n",
9711 atts[i + 1], atts[i], localname);
9712 }
9713 atts[i + 2] = nsname;
9714 } else
9715 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009716 /*
9717 * [ WFC: Unique Att Spec ]
9718 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009719 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009720 * As extended by the Namespace in XML REC.
9721 */
9722 for (j = 0; j < i;j += 5) {
9723 if (atts[i] == atts[j]) {
9724 if (atts[i+1] == atts[j+1]) {
9725 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9726 break;
9727 }
9728 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9729 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9730 "Namespaced Attribute %s in '%s' redefined\n",
9731 atts[i], nsname, NULL);
9732 break;
9733 }
9734 }
9735 }
9736 }
9737
Daniel Veillarde57ec792003-09-10 10:50:59 +00009738 nsname = xmlGetNamespace(ctxt, prefix);
9739 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009740 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9741 "Namespace prefix %s on %s is not defined\n",
9742 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009743 }
9744 *pref = prefix;
9745 *URI = nsname;
9746
9747 /*
9748 * SAX: Start of Element !
9749 */
9750 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9751 (!ctxt->disableSAX)) {
9752 if (nbNs > 0)
9753 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9754 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9755 nbatts / 5, nbdef, atts);
9756 else
9757 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9758 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9759 }
9760
9761 /*
9762 * Free up attribute allocated strings if needed
9763 */
9764 if (attval != 0) {
9765 for (i = 3,j = 0; j < nratts;i += 5,j++)
9766 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9767 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009768 }
9769
9770 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009771
9772base_changed:
9773 /*
9774 * the attribute strings are valid iif the base didn't changed
9775 */
9776 if (attval != 0) {
9777 for (i = 3,j = 0; j < nratts;i += 5,j++)
9778 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9779 xmlFree((xmlChar *) atts[i]);
9780 }
Daniel Veillardf1063fd2015-11-20 16:06:59 +08009781
9782 /*
9783 * We can't switch from one entity to another in the middle
9784 * of a start tag
9785 */
9786 if (inputNr != ctxt->inputNr) {
9787 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
9788 "Start tag doesn't start and stop in the same entity\n");
9789 return(NULL);
9790 }
9791
Daniel Veillarde57ec792003-09-10 10:50:59 +00009792 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009793 ctxt->input->line = oldline;
9794 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009795 if (ctxt->wellFormed == 1) {
9796 goto reparse;
9797 }
9798 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009799}
9800
9801/**
9802 * xmlParseEndTag2:
9803 * @ctxt: an XML parser context
9804 * @line: line of the start tag
9805 * @nsNr: number of namespaces on the start tag
9806 *
9807 * parse an end of tag
9808 *
9809 * [42] ETag ::= '</' Name S? '>'
9810 *
9811 * With namespace
9812 *
9813 * [NS 9] ETag ::= '</' QName S? '>'
9814 */
9815
9816static void
9817xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009818 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009819 const xmlChar *name;
9820
9821 GROW;
9822 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009823 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009824 return;
9825 }
9826 SKIP(2);
9827
William M. Brack13dfa872004-09-18 04:52:08 +00009828 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009829 if (ctxt->input->cur[tlen] == '>') {
9830 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009831 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009832 goto done;
9833 }
9834 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009835 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009836 name = (xmlChar*)1;
9837 } else {
9838 if (prefix == NULL)
9839 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9840 else
9841 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9842 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009843
9844 /*
9845 * We should definitely be at the ending "S? '>'" part
9846 */
9847 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009848 if (ctxt->instate == XML_PARSER_EOF)
9849 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009850 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009851 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009852 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009853 } else
9854 NEXT1;
9855
9856 /*
9857 * [ WFC: Element Type Match ]
9858 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009859 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009860 *
9861 */
9862 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009863 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009864 if ((line == 0) && (ctxt->node != NULL))
9865 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009866 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009867 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009868 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009869 }
9870
9871 /*
9872 * SAX: End of Tag
9873 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009874done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009875 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9876 (!ctxt->disableSAX))
9877 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9878
Daniel Veillard0fb18932003-09-07 09:14:37 +00009879 spacePop(ctxt);
9880 if (nsNr != 0)
9881 nsPop(ctxt, nsNr);
9882 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009883}
9884
9885/**
Owen Taylor3473f882001-02-23 17:55:21 +00009886 * xmlParseCDSect:
9887 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009888 *
Owen Taylor3473f882001-02-23 17:55:21 +00009889 * Parse escaped pure raw content.
9890 *
9891 * [18] CDSect ::= CDStart CData CDEnd
9892 *
9893 * [19] CDStart ::= '<![CDATA['
9894 *
9895 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9896 *
9897 * [21] CDEnd ::= ']]>'
9898 */
9899void
9900xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9901 xmlChar *buf = NULL;
9902 int len = 0;
9903 int size = XML_PARSER_BUFFER_SIZE;
9904 int r, rl;
9905 int s, sl;
9906 int cur, l;
9907 int count = 0;
9908
Daniel Veillard8f597c32003-10-06 08:19:27 +00009909 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009910 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009911 SKIP(9);
9912 } else
9913 return;
9914
9915 ctxt->instate = XML_PARSER_CDATA_SECTION;
9916 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009917 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009918 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009919 ctxt->instate = XML_PARSER_CONTENT;
9920 return;
9921 }
9922 NEXTL(rl);
9923 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009924 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009925 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009926 ctxt->instate = XML_PARSER_CONTENT;
9927 return;
9928 }
9929 NEXTL(sl);
9930 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009931 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009932 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009933 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009934 return;
9935 }
William M. Brack871611b2003-10-18 04:53:14 +00009936 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009937 ((r != ']') || (s != ']') || (cur != '>'))) {
9938 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009939 xmlChar *tmp;
9940
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009941 if ((size > XML_MAX_TEXT_LENGTH) &&
9942 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9943 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9944 "CData section too big found", NULL);
9945 xmlFree (buf);
9946 return;
9947 }
9948 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009949 if (tmp == NULL) {
9950 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009951 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009952 return;
9953 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009954 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009955 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009956 }
9957 COPY_BUF(rl,buf,len,r);
9958 r = s;
9959 rl = sl;
9960 s = cur;
9961 sl = l;
9962 count++;
9963 if (count > 50) {
9964 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009965 if (ctxt->instate == XML_PARSER_EOF) {
9966 xmlFree(buf);
9967 return;
9968 }
Owen Taylor3473f882001-02-23 17:55:21 +00009969 count = 0;
9970 }
9971 NEXTL(l);
9972 cur = CUR_CHAR(l);
9973 }
9974 buf[len] = 0;
9975 ctxt->instate = XML_PARSER_CONTENT;
9976 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009977 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009978 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009979 xmlFree(buf);
9980 return;
9981 }
9982 NEXTL(l);
9983
9984 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009985 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009986 */
9987 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9988 if (ctxt->sax->cdataBlock != NULL)
9989 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009990 else if (ctxt->sax->characters != NULL)
9991 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009992 }
9993 xmlFree(buf);
9994}
9995
9996/**
9997 * xmlParseContent:
9998 * @ctxt: an XML parser context
9999 *
10000 * Parse a content:
10001 *
10002 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10003 */
10004
10005void
10006xmlParseContent(xmlParserCtxtPtr ctxt) {
10007 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +000010008 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010009 ((RAW != '<') || (NXT(1) != '/')) &&
10010 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010011 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +000010012 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +000010013 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +000010014
10015 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010016 * First case : a Processing Instruction.
10017 */
Daniel Veillardfdc91562002-07-01 21:52:03 +000010018 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010019 xmlParsePI(ctxt);
10020 }
10021
10022 /*
10023 * Second case : a CDSection
10024 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010025 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010026 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010027 xmlParseCDSect(ctxt);
10028 }
10029
10030 /*
10031 * Third case : a comment
10032 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010033 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010034 (NXT(2) == '-') && (NXT(3) == '-')) {
10035 xmlParseComment(ctxt);
10036 ctxt->instate = XML_PARSER_CONTENT;
10037 }
10038
10039 /*
10040 * Fourth case : a sub-element.
10041 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010042 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +000010043 xmlParseElement(ctxt);
10044 }
10045
10046 /*
10047 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010048 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +000010049 */
10050
Daniel Veillard21a0f912001-02-25 19:54:14 +000010051 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +000010052 xmlParseReference(ctxt);
10053 }
10054
10055 /*
10056 * Last case, text. Note that References are handled directly.
10057 */
10058 else {
10059 xmlParseCharData(ctxt, 0);
10060 }
10061
10062 GROW;
10063 /*
10064 * Pop-up of finished entities.
10065 */
Daniel Veillard561b7f82002-03-20 21:55:57 +000010066 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +000010067 xmlPopInput(ctxt);
10068 SHRINK;
10069
Daniel Veillardfdc91562002-07-01 21:52:03 +000010070 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010071 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10072 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080010073 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010074 break;
10075 }
10076 }
10077}
10078
10079/**
10080 * xmlParseElement:
10081 * @ctxt: an XML parser context
10082 *
10083 * parse an XML element, this is highly recursive
10084 *
10085 * [39] element ::= EmptyElemTag | STag content ETag
10086 *
10087 * [ WFC: Element Type Match ]
10088 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010089 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +000010090 *
Owen Taylor3473f882001-02-23 17:55:21 +000010091 */
10092
10093void
10094xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010095 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010096 const xmlChar *prefix = NULL;
10097 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010098 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010099 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010100 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010101 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010102
Daniel Veillard8915c152008-08-26 13:05:34 +000010103 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10104 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10105 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10106 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10107 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +080010108 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010109 return;
10110 }
10111
Owen Taylor3473f882001-02-23 17:55:21 +000010112 /* Capture start position */
10113 if (ctxt->record_info) {
10114 node_info.begin_pos = ctxt->input->consumed +
10115 (CUR_PTR - ctxt->input->base);
10116 node_info.begin_line = ctxt->input->line;
10117 }
10118
10119 if (ctxt->spaceNr == 0)
10120 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010121 else if (*ctxt->space == -2)
10122 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010123 else
10124 spacePush(ctxt, *ctxt->space);
10125
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010126 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010127#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010128 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010129#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010130 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010131#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010132 else
10133 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010134#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010135 if (ctxt->instate == XML_PARSER_EOF)
10136 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010137 if (name == NULL) {
10138 spacePop(ctxt);
10139 return;
10140 }
10141 namePush(ctxt, name);
10142 ret = ctxt->node;
10143
Daniel Veillard4432df22003-09-28 18:58:27 +000010144#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010145 /*
10146 * [ VC: Root Element Type ]
10147 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010148 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010149 */
10150 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10151 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10152 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010153#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010154
10155 /*
10156 * Check for an Empty Element.
10157 */
10158 if ((RAW == '/') && (NXT(1) == '>')) {
10159 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010160 if (ctxt->sax2) {
10161 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10162 (!ctxt->disableSAX))
10163 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010164#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010165 } else {
10166 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10167 (!ctxt->disableSAX))
10168 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010169#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010170 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010171 namePop(ctxt);
10172 spacePop(ctxt);
10173 if (nsNr != ctxt->nsNr)
10174 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010175 if ( ret != NULL && ctxt->record_info ) {
10176 node_info.end_pos = ctxt->input->consumed +
10177 (CUR_PTR - ctxt->input->base);
10178 node_info.end_line = ctxt->input->line;
10179 node_info.node = ret;
10180 xmlParserAddNodeInfo(ctxt, &node_info);
10181 }
10182 return;
10183 }
10184 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010185 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010186 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010187 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10188 "Couldn't find end of Start Tag %s line %d\n",
10189 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010190
10191 /*
10192 * end of parsing of this node.
10193 */
10194 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010195 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010196 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010197 if (nsNr != ctxt->nsNr)
10198 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010199
10200 /*
10201 * Capture end position and add node
10202 */
10203 if ( ret != NULL && ctxt->record_info ) {
10204 node_info.end_pos = ctxt->input->consumed +
10205 (CUR_PTR - ctxt->input->base);
10206 node_info.end_line = ctxt->input->line;
10207 node_info.node = ret;
10208 xmlParserAddNodeInfo(ctxt, &node_info);
10209 }
10210 return;
10211 }
10212
10213 /*
10214 * Parse the content of the element:
10215 */
10216 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010217 if (ctxt->instate == XML_PARSER_EOF)
10218 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010219 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010220 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010221 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010222 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010223
10224 /*
10225 * end of parsing of this node.
10226 */
10227 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010228 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010229 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010230 if (nsNr != ctxt->nsNr)
10231 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010232 return;
10233 }
10234
10235 /*
10236 * parse the end of tag: '</' should be here.
10237 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010238 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010239 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010240 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010241 }
10242#ifdef LIBXML_SAX1_ENABLED
10243 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010244 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010245#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010246
10247 /*
10248 * Capture end position and add node
10249 */
10250 if ( ret != NULL && ctxt->record_info ) {
10251 node_info.end_pos = ctxt->input->consumed +
10252 (CUR_PTR - ctxt->input->base);
10253 node_info.end_line = ctxt->input->line;
10254 node_info.node = ret;
10255 xmlParserAddNodeInfo(ctxt, &node_info);
10256 }
10257}
10258
10259/**
10260 * xmlParseVersionNum:
10261 * @ctxt: an XML parser context
10262 *
10263 * parse the XML version value.
10264 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010265 * [26] VersionNum ::= '1.' [0-9]+
10266 *
10267 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010268 *
10269 * Returns the string giving the XML version number, or NULL
10270 */
10271xmlChar *
10272xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10273 xmlChar *buf = NULL;
10274 int len = 0;
10275 int size = 10;
10276 xmlChar cur;
10277
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010278 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010279 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010280 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010281 return(NULL);
10282 }
10283 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010284 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010285 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010286 return(NULL);
10287 }
10288 buf[len++] = cur;
10289 NEXT;
10290 cur=CUR;
10291 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010292 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010293 return(NULL);
10294 }
10295 buf[len++] = cur;
10296 NEXT;
10297 cur=CUR;
10298 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010299 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010300 xmlChar *tmp;
10301
Owen Taylor3473f882001-02-23 17:55:21 +000010302 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010303 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10304 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010305 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010306 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010307 return(NULL);
10308 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010309 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010310 }
10311 buf[len++] = cur;
10312 NEXT;
10313 cur=CUR;
10314 }
10315 buf[len] = 0;
10316 return(buf);
10317}
10318
10319/**
10320 * xmlParseVersionInfo:
10321 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010322 *
Owen Taylor3473f882001-02-23 17:55:21 +000010323 * parse the XML version.
10324 *
10325 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010326 *
Owen Taylor3473f882001-02-23 17:55:21 +000010327 * [25] Eq ::= S? '=' S?
10328 *
10329 * Returns the version string, e.g. "1.0"
10330 */
10331
10332xmlChar *
10333xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10334 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010335
Daniel Veillarda07050d2003-10-19 14:46:32 +000010336 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010337 SKIP(7);
10338 SKIP_BLANKS;
10339 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010340 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010341 return(NULL);
10342 }
10343 NEXT;
10344 SKIP_BLANKS;
10345 if (RAW == '"') {
10346 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010347 version = xmlParseVersionNum(ctxt);
10348 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010349 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010350 } else
10351 NEXT;
10352 } else if (RAW == '\''){
10353 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010354 version = xmlParseVersionNum(ctxt);
10355 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010356 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010357 } else
10358 NEXT;
10359 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010360 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010361 }
10362 }
10363 return(version);
10364}
10365
10366/**
10367 * xmlParseEncName:
10368 * @ctxt: an XML parser context
10369 *
10370 * parse the XML encoding name
10371 *
10372 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10373 *
10374 * Returns the encoding name value or NULL
10375 */
10376xmlChar *
10377xmlParseEncName(xmlParserCtxtPtr ctxt) {
10378 xmlChar *buf = NULL;
10379 int len = 0;
10380 int size = 10;
10381 xmlChar cur;
10382
10383 cur = CUR;
10384 if (((cur >= 'a') && (cur <= 'z')) ||
10385 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010386 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010387 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010388 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010389 return(NULL);
10390 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010391
Owen Taylor3473f882001-02-23 17:55:21 +000010392 buf[len++] = cur;
10393 NEXT;
10394 cur = CUR;
10395 while (((cur >= 'a') && (cur <= 'z')) ||
10396 ((cur >= 'A') && (cur <= 'Z')) ||
10397 ((cur >= '0') && (cur <= '9')) ||
10398 (cur == '.') || (cur == '_') ||
10399 (cur == '-')) {
10400 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010401 xmlChar *tmp;
10402
Owen Taylor3473f882001-02-23 17:55:21 +000010403 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010404 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10405 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010406 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010407 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010408 return(NULL);
10409 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010410 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010411 }
10412 buf[len++] = cur;
10413 NEXT;
10414 cur = CUR;
10415 if (cur == 0) {
10416 SHRINK;
10417 GROW;
10418 cur = CUR;
10419 }
10420 }
10421 buf[len] = 0;
10422 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010423 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010424 }
10425 return(buf);
10426}
10427
10428/**
10429 * xmlParseEncodingDecl:
10430 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010431 *
Owen Taylor3473f882001-02-23 17:55:21 +000010432 * parse the XML encoding declaration
10433 *
10434 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10435 *
10436 * this setups the conversion filters.
10437 *
10438 * Returns the encoding value or NULL
10439 */
10440
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010441const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010442xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10443 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010444
10445 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010446 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010447 SKIP(8);
10448 SKIP_BLANKS;
10449 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010450 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010451 return(NULL);
10452 }
10453 NEXT;
10454 SKIP_BLANKS;
10455 if (RAW == '"') {
10456 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010457 encoding = xmlParseEncName(ctxt);
10458 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010459 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010460 xmlFree((xmlChar *) encoding);
10461 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010462 } else
10463 NEXT;
10464 } else if (RAW == '\''){
10465 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010466 encoding = xmlParseEncName(ctxt);
10467 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010468 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010469 xmlFree((xmlChar *) encoding);
10470 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010471 } else
10472 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010473 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010474 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010475 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010476
10477 /*
10478 * Non standard parsing, allowing the user to ignore encoding
10479 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010480 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10481 xmlFree((xmlChar *) encoding);
10482 return(NULL);
10483 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010484
Daniel Veillard6b621b82003-08-11 15:03:34 +000010485 /*
10486 * UTF-16 encoding stwich has already taken place at this stage,
10487 * more over the little-endian/big-endian selection is already done
10488 */
10489 if ((encoding != NULL) &&
10490 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10491 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010492 /*
10493 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010494 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010495 * document is apparently UTF-8 compatible, then raise an
10496 * encoding mismatch fatal error
10497 */
10498 if ((ctxt->encoding == NULL) &&
10499 (ctxt->input->buf != NULL) &&
10500 (ctxt->input->buf->encoder == NULL)) {
10501 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10502 "Document labelled UTF-16 but has UTF-8 content\n");
10503 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010504 if (ctxt->encoding != NULL)
10505 xmlFree((xmlChar *) ctxt->encoding);
10506 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010507 }
10508 /*
10509 * UTF-8 encoding is handled natively
10510 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010511 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010512 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10513 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010514 if (ctxt->encoding != NULL)
10515 xmlFree((xmlChar *) ctxt->encoding);
10516 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010517 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010518 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010519 xmlCharEncodingHandlerPtr handler;
10520
10521 if (ctxt->input->encoding != NULL)
10522 xmlFree((xmlChar *) ctxt->input->encoding);
10523 ctxt->input->encoding = encoding;
10524
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010525 handler = xmlFindCharEncodingHandler((const char *) encoding);
10526 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010527 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10528 /* failed to convert */
10529 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10530 return(NULL);
10531 }
Owen Taylor3473f882001-02-23 17:55:21 +000010532 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010533 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010534 "Unsupported encoding %s\n", encoding);
10535 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010536 }
10537 }
10538 }
10539 return(encoding);
10540}
10541
10542/**
10543 * xmlParseSDDecl:
10544 * @ctxt: an XML parser context
10545 *
10546 * parse the XML standalone declaration
10547 *
10548 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010549 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010550 *
10551 * [ VC: Standalone Document Declaration ]
10552 * TODO The standalone document declaration must have the value "no"
10553 * if any external markup declarations contain declarations of:
10554 * - attributes with default values, if elements to which these
10555 * attributes apply appear in the document without specifications
10556 * of values for these attributes, or
10557 * - entities (other than amp, lt, gt, apos, quot), if references
10558 * to those entities appear in the document, or
10559 * - attributes with values subject to normalization, where the
10560 * attribute appears in the document with a value which will change
10561 * as a result of normalization, or
10562 * - element types with element content, if white space occurs directly
10563 * within any instance of those types.
10564 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010565 * Returns:
10566 * 1 if standalone="yes"
10567 * 0 if standalone="no"
10568 * -2 if standalone attribute is missing or invalid
10569 * (A standalone value of -2 means that the XML declaration was found,
10570 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010571 */
10572
10573int
10574xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010575 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010576
10577 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010578 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010579 SKIP(10);
10580 SKIP_BLANKS;
10581 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010582 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010583 return(standalone);
10584 }
10585 NEXT;
10586 SKIP_BLANKS;
10587 if (RAW == '\''){
10588 NEXT;
10589 if ((RAW == 'n') && (NXT(1) == 'o')) {
10590 standalone = 0;
10591 SKIP(2);
10592 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10593 (NXT(2) == 's')) {
10594 standalone = 1;
10595 SKIP(3);
10596 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010597 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010598 }
10599 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010600 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010601 } else
10602 NEXT;
10603 } else if (RAW == '"'){
10604 NEXT;
10605 if ((RAW == 'n') && (NXT(1) == 'o')) {
10606 standalone = 0;
10607 SKIP(2);
10608 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10609 (NXT(2) == 's')) {
10610 standalone = 1;
10611 SKIP(3);
10612 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010613 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010614 }
10615 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010616 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010617 } else
10618 NEXT;
10619 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010620 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010621 }
10622 }
10623 return(standalone);
10624}
10625
10626/**
10627 * xmlParseXMLDecl:
10628 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010629 *
Owen Taylor3473f882001-02-23 17:55:21 +000010630 * parse an XML declaration header
10631 *
10632 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10633 */
10634
10635void
10636xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10637 xmlChar *version;
10638
10639 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010640 * This value for standalone indicates that the document has an
10641 * XML declaration but it does not have a standalone attribute.
10642 * It will be overwritten later if a standalone attribute is found.
10643 */
10644 ctxt->input->standalone = -2;
10645
10646 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010647 * We know that '<?xml' is here.
10648 */
10649 SKIP(5);
10650
William M. Brack76e95df2003-10-18 16:20:14 +000010651 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010652 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10653 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010654 }
10655 SKIP_BLANKS;
10656
10657 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010658 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010659 */
10660 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010661 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010662 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010663 } else {
10664 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10665 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010666 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010667 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010668 if (ctxt->options & XML_PARSE_OLD10) {
10669 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10670 "Unsupported version '%s'\n",
10671 version);
10672 } else {
10673 if ((version[0] == '1') && ((version[1] == '.'))) {
10674 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10675 "Unsupported version '%s'\n",
10676 version, NULL);
10677 } else {
10678 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10679 "Unsupported version '%s'\n",
10680 version);
10681 }
10682 }
Daniel Veillard19840942001-11-29 16:11:38 +000010683 }
10684 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010685 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010686 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010687 }
Owen Taylor3473f882001-02-23 17:55:21 +000010688
10689 /*
10690 * We may have the encoding declaration
10691 */
William M. Brack76e95df2003-10-18 16:20:14 +000010692 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010693 if ((RAW == '?') && (NXT(1) == '>')) {
10694 SKIP(2);
10695 return;
10696 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010697 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010698 }
10699 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010700 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10701 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010702 /*
10703 * The XML REC instructs us to stop parsing right here
10704 */
10705 return;
10706 }
10707
10708 /*
10709 * We may have the standalone status.
10710 */
William M. Brack76e95df2003-10-18 16:20:14 +000010711 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010712 if ((RAW == '?') && (NXT(1) == '>')) {
10713 SKIP(2);
10714 return;
10715 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010716 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010717 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010718
10719 /*
10720 * We can grow the input buffer freely at that point
10721 */
10722 GROW;
10723
Owen Taylor3473f882001-02-23 17:55:21 +000010724 SKIP_BLANKS;
10725 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10726
10727 SKIP_BLANKS;
10728 if ((RAW == '?') && (NXT(1) == '>')) {
10729 SKIP(2);
10730 } else if (RAW == '>') {
10731 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010732 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010733 NEXT;
10734 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010735 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010736 MOVETO_ENDTAG(CUR_PTR);
10737 NEXT;
10738 }
10739}
10740
10741/**
10742 * xmlParseMisc:
10743 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010744 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010745 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010746 *
10747 * [27] Misc ::= Comment | PI | S
10748 */
10749
10750void
10751xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010752 while ((ctxt->instate != XML_PARSER_EOF) &&
10753 (((RAW == '<') && (NXT(1) == '?')) ||
10754 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10755 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010756 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010757 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010758 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010759 NEXT;
10760 } else
10761 xmlParseComment(ctxt);
10762 }
10763}
10764
10765/**
10766 * xmlParseDocument:
10767 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010768 *
Owen Taylor3473f882001-02-23 17:55:21 +000010769 * parse an XML document (and build a tree if using the standard SAX
10770 * interface).
10771 *
10772 * [1] document ::= prolog element Misc*
10773 *
10774 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10775 *
10776 * Returns 0, -1 in case of error. the parser context is augmented
10777 * as a result of the parsing.
10778 */
10779
10780int
10781xmlParseDocument(xmlParserCtxtPtr ctxt) {
10782 xmlChar start[4];
10783 xmlCharEncoding enc;
10784
10785 xmlInitParser();
10786
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010787 if ((ctxt == NULL) || (ctxt->input == NULL))
10788 return(-1);
10789
Owen Taylor3473f882001-02-23 17:55:21 +000010790 GROW;
10791
10792 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010793 * SAX: detecting the level.
10794 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010795 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010796
10797 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010798 * SAX: beginning of the document processing.
10799 */
10800 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10801 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010802 if (ctxt->instate == XML_PARSER_EOF)
10803 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010804
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010805 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010806 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010807 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010808 * Get the 4 first bytes and decode the charset
10809 * if enc != XML_CHAR_ENCODING_NONE
10810 * plug some encoding conversion routines.
10811 */
10812 start[0] = RAW;
10813 start[1] = NXT(1);
10814 start[2] = NXT(2);
10815 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010816 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010817 if (enc != XML_CHAR_ENCODING_NONE) {
10818 xmlSwitchEncoding(ctxt, enc);
10819 }
Owen Taylor3473f882001-02-23 17:55:21 +000010820 }
10821
10822
10823 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010824 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010825 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010826 }
10827
10828 /*
10829 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010830 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010831 * than just the first line, unless the amount of data is really
10832 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010833 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010834 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10835 GROW;
10836 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010837 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010838
10839 /*
10840 * Note that we will switch encoding on the fly.
10841 */
10842 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010843 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10844 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010845 /*
10846 * The XML REC instructs us to stop parsing right here
10847 */
10848 return(-1);
10849 }
10850 ctxt->standalone = ctxt->input->standalone;
10851 SKIP_BLANKS;
10852 } else {
10853 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10854 }
10855 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10856 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010857 if (ctxt->instate == XML_PARSER_EOF)
10858 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010859 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10860 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10861 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10862 }
Owen Taylor3473f882001-02-23 17:55:21 +000010863
10864 /*
10865 * The Misc part of the Prolog
10866 */
10867 GROW;
10868 xmlParseMisc(ctxt);
10869
10870 /*
10871 * Then possibly doc type declaration(s) and more Misc
10872 * (doctypedecl Misc*)?
10873 */
10874 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010875 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010876
10877 ctxt->inSubset = 1;
10878 xmlParseDocTypeDecl(ctxt);
10879 if (RAW == '[') {
10880 ctxt->instate = XML_PARSER_DTD;
10881 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010882 if (ctxt->instate == XML_PARSER_EOF)
10883 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010884 }
10885
10886 /*
10887 * Create and update the external subset.
10888 */
10889 ctxt->inSubset = 2;
10890 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10891 (!ctxt->disableSAX))
10892 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10893 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010894 if (ctxt->instate == XML_PARSER_EOF)
10895 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010896 ctxt->inSubset = 0;
10897
Daniel Veillardac4118d2008-01-11 05:27:32 +000010898 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010899
10900 ctxt->instate = XML_PARSER_PROLOG;
10901 xmlParseMisc(ctxt);
10902 }
10903
10904 /*
10905 * Time to start parsing the tree itself
10906 */
10907 GROW;
10908 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010909 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10910 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010911 } else {
10912 ctxt->instate = XML_PARSER_CONTENT;
10913 xmlParseElement(ctxt);
10914 ctxt->instate = XML_PARSER_EPILOG;
10915
10916
10917 /*
10918 * The Misc part at the end
10919 */
10920 xmlParseMisc(ctxt);
10921
Daniel Veillard561b7f82002-03-20 21:55:57 +000010922 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010923 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010924 }
10925 ctxt->instate = XML_PARSER_EOF;
10926 }
10927
10928 /*
10929 * SAX: end of the document processing.
10930 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010931 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010932 ctxt->sax->endDocument(ctxt->userData);
10933
Daniel Veillard5997aca2002-03-18 18:36:20 +000010934 /*
10935 * Remove locally kept entity definitions if the tree was not built
10936 */
10937 if ((ctxt->myDoc != NULL) &&
10938 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10939 xmlFreeDoc(ctxt->myDoc);
10940 ctxt->myDoc = NULL;
10941 }
10942
Daniel Veillardae0765b2008-07-31 19:54:59 +000010943 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10944 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10945 if (ctxt->valid)
10946 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10947 if (ctxt->nsWellFormed)
10948 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10949 if (ctxt->options & XML_PARSE_OLD10)
10950 ctxt->myDoc->properties |= XML_DOC_OLD10;
10951 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010952 if (! ctxt->wellFormed) {
10953 ctxt->valid = 0;
10954 return(-1);
10955 }
Owen Taylor3473f882001-02-23 17:55:21 +000010956 return(0);
10957}
10958
10959/**
10960 * xmlParseExtParsedEnt:
10961 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010962 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010963 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010964 * An external general parsed entity is well-formed if it matches the
10965 * production labeled extParsedEnt.
10966 *
10967 * [78] extParsedEnt ::= TextDecl? content
10968 *
10969 * Returns 0, -1 in case of error. the parser context is augmented
10970 * as a result of the parsing.
10971 */
10972
10973int
10974xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10975 xmlChar start[4];
10976 xmlCharEncoding enc;
10977
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010978 if ((ctxt == NULL) || (ctxt->input == NULL))
10979 return(-1);
10980
Owen Taylor3473f882001-02-23 17:55:21 +000010981 xmlDefaultSAXHandlerInit();
10982
Daniel Veillard309f81d2003-09-23 09:02:53 +000010983 xmlDetectSAX2(ctxt);
10984
Owen Taylor3473f882001-02-23 17:55:21 +000010985 GROW;
10986
10987 /*
10988 * SAX: beginning of the document processing.
10989 */
10990 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10991 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10992
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010993 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010994 * Get the 4 first bytes and decode the charset
10995 * if enc != XML_CHAR_ENCODING_NONE
10996 * plug some encoding conversion routines.
10997 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010998 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10999 start[0] = RAW;
11000 start[1] = NXT(1);
11001 start[2] = NXT(2);
11002 start[3] = NXT(3);
11003 enc = xmlDetectCharEncoding(start, 4);
11004 if (enc != XML_CHAR_ENCODING_NONE) {
11005 xmlSwitchEncoding(ctxt, enc);
11006 }
Owen Taylor3473f882001-02-23 17:55:21 +000011007 }
11008
11009
11010 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011011 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011012 }
11013
11014 /*
11015 * Check for the XMLDecl in the Prolog.
11016 */
11017 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000011018 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011019
11020 /*
11021 * Note that we will switch encoding on the fly.
11022 */
11023 xmlParseXMLDecl(ctxt);
11024 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11025 /*
11026 * The XML REC instructs us to stop parsing right here
11027 */
11028 return(-1);
11029 }
11030 SKIP_BLANKS;
11031 } else {
11032 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11033 }
11034 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11035 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011036 if (ctxt->instate == XML_PARSER_EOF)
11037 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000011038
11039 /*
11040 * Doing validity checking on chunk doesn't make sense
11041 */
11042 ctxt->instate = XML_PARSER_CONTENT;
11043 ctxt->validate = 0;
11044 ctxt->loadsubset = 0;
11045 ctxt->depth = 0;
11046
11047 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011048 if (ctxt->instate == XML_PARSER_EOF)
11049 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011050
Owen Taylor3473f882001-02-23 17:55:21 +000011051 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011052 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011053 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011054 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011055 }
11056
11057 /*
11058 * SAX: end of the document processing.
11059 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011060 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011061 ctxt->sax->endDocument(ctxt->userData);
11062
11063 if (! ctxt->wellFormed) return(-1);
11064 return(0);
11065}
11066
Daniel Veillard73b013f2003-09-30 12:36:01 +000011067#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011068/************************************************************************
11069 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011070 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000011071 * *
11072 ************************************************************************/
11073
11074/**
11075 * xmlParseLookupSequence:
11076 * @ctxt: an XML parser context
11077 * @first: the first char to lookup
11078 * @next: the next char to lookup or zero
11079 * @third: the next char to lookup or zero
11080 *
11081 * Try to find if a sequence (first, next, third) or just (first next) or
11082 * (first) is available in the input stream.
11083 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11084 * to avoid rescanning sequences of bytes, it DOES change the state of the
11085 * parser, do not use liberally.
11086 *
11087 * Returns the index to the current parsing point if the full sequence
11088 * is available, -1 otherwise.
11089 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011090static int
Owen Taylor3473f882001-02-23 17:55:21 +000011091xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11092 xmlChar next, xmlChar third) {
11093 int base, len;
11094 xmlParserInputPtr in;
11095 const xmlChar *buf;
11096
11097 in = ctxt->input;
11098 if (in == NULL) return(-1);
11099 base = in->cur - in->base;
11100 if (base < 0) return(-1);
11101 if (ctxt->checkIndex > base)
11102 base = ctxt->checkIndex;
11103 if (in->buf == NULL) {
11104 buf = in->base;
11105 len = in->length;
11106 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011107 buf = xmlBufContent(in->buf->buffer);
11108 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011109 }
11110 /* take into account the sequence length */
11111 if (third) len -= 2;
11112 else if (next) len --;
11113 for (;base < len;base++) {
11114 if (buf[base] == first) {
11115 if (third != 0) {
11116 if ((buf[base + 1] != next) ||
11117 (buf[base + 2] != third)) continue;
11118 } else if (next != 0) {
11119 if (buf[base + 1] != next) continue;
11120 }
11121 ctxt->checkIndex = 0;
11122#ifdef DEBUG_PUSH
11123 if (next == 0)
11124 xmlGenericError(xmlGenericErrorContext,
11125 "PP: lookup '%c' found at %d\n",
11126 first, base);
11127 else if (third == 0)
11128 xmlGenericError(xmlGenericErrorContext,
11129 "PP: lookup '%c%c' found at %d\n",
11130 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011131 else
Owen Taylor3473f882001-02-23 17:55:21 +000011132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: lookup '%c%c%c' found at %d\n",
11134 first, next, third, base);
11135#endif
11136 return(base - (in->cur - in->base));
11137 }
11138 }
11139 ctxt->checkIndex = base;
11140#ifdef DEBUG_PUSH
11141 if (next == 0)
11142 xmlGenericError(xmlGenericErrorContext,
11143 "PP: lookup '%c' failed\n", first);
11144 else if (third == 0)
11145 xmlGenericError(xmlGenericErrorContext,
11146 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011147 else
Owen Taylor3473f882001-02-23 17:55:21 +000011148 xmlGenericError(xmlGenericErrorContext,
11149 "PP: lookup '%c%c%c' failed\n", first, next, third);
11150#endif
11151 return(-1);
11152}
11153
11154/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011155 * xmlParseGetLasts:
11156 * @ctxt: an XML parser context
11157 * @lastlt: pointer to store the last '<' from the input
11158 * @lastgt: pointer to store the last '>' from the input
11159 *
11160 * Lookup the last < and > in the current chunk
11161 */
11162static void
11163xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11164 const xmlChar **lastgt) {
11165 const xmlChar *tmp;
11166
11167 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11168 xmlGenericError(xmlGenericErrorContext,
11169 "Internal error: xmlParseGetLasts\n");
11170 return;
11171 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011172 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011173 tmp = ctxt->input->end;
11174 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011175 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011176 if (tmp < ctxt->input->base) {
11177 *lastlt = NULL;
11178 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011179 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011180 *lastlt = tmp;
11181 tmp++;
11182 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11183 if (*tmp == '\'') {
11184 tmp++;
11185 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11186 if (tmp < ctxt->input->end) tmp++;
11187 } else if (*tmp == '"') {
11188 tmp++;
11189 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11190 if (tmp < ctxt->input->end) tmp++;
11191 } else
11192 tmp++;
11193 }
11194 if (tmp < ctxt->input->end)
11195 *lastgt = tmp;
11196 else {
11197 tmp = *lastlt;
11198 tmp--;
11199 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11200 if (tmp >= ctxt->input->base)
11201 *lastgt = tmp;
11202 else
11203 *lastgt = NULL;
11204 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011205 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011206 } else {
11207 *lastlt = NULL;
11208 *lastgt = NULL;
11209 }
11210}
11211/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011212 * xmlCheckCdataPush:
11213 * @cur: pointer to the bock of characters
11214 * @len: length of the block in bytes
11215 *
11216 * Check that the block of characters is okay as SCdata content [20]
11217 *
11218 * Returns the number of bytes to pass if okay, a negative index where an
11219 * UTF-8 error occured otherwise
11220 */
11221static int
11222xmlCheckCdataPush(const xmlChar *utf, int len) {
11223 int ix;
11224 unsigned char c;
11225 int codepoint;
11226
11227 if ((utf == NULL) || (len <= 0))
11228 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011229
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011230 for (ix = 0; ix < len;) { /* string is 0-terminated */
11231 c = utf[ix];
11232 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11233 if (c >= 0x20)
11234 ix++;
11235 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11236 ix++;
11237 else
11238 return(-ix);
11239 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
Daniel Veillard4a5d80a2015-09-18 15:06:46 +080011240 if (ix + 2 > len) return(-ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011241 if ((utf[ix+1] & 0xc0 ) != 0x80)
11242 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011243 codepoint = (utf[ix] & 0x1f) << 6;
11244 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011245 if (!xmlIsCharQ(codepoint))
11246 return(-ix);
11247 ix += 2;
11248 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
Daniel Veillard4a5d80a2015-09-18 15:06:46 +080011249 if (ix + 3 > len) return(-ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011250 if (((utf[ix+1] & 0xc0) != 0x80) ||
11251 ((utf[ix+2] & 0xc0) != 0x80))
11252 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011253 codepoint = (utf[ix] & 0xf) << 12;
11254 codepoint |= (utf[ix+1] & 0x3f) << 6;
11255 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011256 if (!xmlIsCharQ(codepoint))
11257 return(-ix);
11258 ix += 3;
11259 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
Daniel Veillard4a5d80a2015-09-18 15:06:46 +080011260 if (ix + 4 > len) return(-ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011261 if (((utf[ix+1] & 0xc0) != 0x80) ||
11262 ((utf[ix+2] & 0xc0) != 0x80) ||
11263 ((utf[ix+3] & 0xc0) != 0x80))
11264 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011265 codepoint = (utf[ix] & 0x7) << 18;
11266 codepoint |= (utf[ix+1] & 0x3f) << 12;
11267 codepoint |= (utf[ix+2] & 0x3f) << 6;
11268 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011269 if (!xmlIsCharQ(codepoint))
11270 return(-ix);
11271 ix += 4;
11272 } else /* unknown encoding */
11273 return(-ix);
11274 }
11275 return(ix);
11276}
11277
11278/**
Owen Taylor3473f882001-02-23 17:55:21 +000011279 * xmlParseTryOrFinish:
11280 * @ctxt: an XML parser context
11281 * @terminate: last chunk indicator
11282 *
11283 * Try to progress on parsing
11284 *
11285 * Returns zero if no parsing was possible
11286 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011287static int
Owen Taylor3473f882001-02-23 17:55:21 +000011288xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11289 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011290 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011291 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011292 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011293
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011294 if (ctxt->input == NULL)
11295 return(0);
11296
Owen Taylor3473f882001-02-23 17:55:21 +000011297#ifdef DEBUG_PUSH
11298 switch (ctxt->instate) {
11299 case XML_PARSER_EOF:
11300 xmlGenericError(xmlGenericErrorContext,
11301 "PP: try EOF\n"); break;
11302 case XML_PARSER_START:
11303 xmlGenericError(xmlGenericErrorContext,
11304 "PP: try START\n"); break;
11305 case XML_PARSER_MISC:
11306 xmlGenericError(xmlGenericErrorContext,
11307 "PP: try MISC\n");break;
11308 case XML_PARSER_COMMENT:
11309 xmlGenericError(xmlGenericErrorContext,
11310 "PP: try COMMENT\n");break;
11311 case XML_PARSER_PROLOG:
11312 xmlGenericError(xmlGenericErrorContext,
11313 "PP: try PROLOG\n");break;
11314 case XML_PARSER_START_TAG:
11315 xmlGenericError(xmlGenericErrorContext,
11316 "PP: try START_TAG\n");break;
11317 case XML_PARSER_CONTENT:
11318 xmlGenericError(xmlGenericErrorContext,
11319 "PP: try CONTENT\n");break;
11320 case XML_PARSER_CDATA_SECTION:
11321 xmlGenericError(xmlGenericErrorContext,
11322 "PP: try CDATA_SECTION\n");break;
11323 case XML_PARSER_END_TAG:
11324 xmlGenericError(xmlGenericErrorContext,
11325 "PP: try END_TAG\n");break;
11326 case XML_PARSER_ENTITY_DECL:
11327 xmlGenericError(xmlGenericErrorContext,
11328 "PP: try ENTITY_DECL\n");break;
11329 case XML_PARSER_ENTITY_VALUE:
11330 xmlGenericError(xmlGenericErrorContext,
11331 "PP: try ENTITY_VALUE\n");break;
11332 case XML_PARSER_ATTRIBUTE_VALUE:
11333 xmlGenericError(xmlGenericErrorContext,
11334 "PP: try ATTRIBUTE_VALUE\n");break;
11335 case XML_PARSER_DTD:
11336 xmlGenericError(xmlGenericErrorContext,
11337 "PP: try DTD\n");break;
11338 case XML_PARSER_EPILOG:
11339 xmlGenericError(xmlGenericErrorContext,
11340 "PP: try EPILOG\n");break;
11341 case XML_PARSER_PI:
11342 xmlGenericError(xmlGenericErrorContext,
11343 "PP: try PI\n");break;
11344 case XML_PARSER_IGNORE:
11345 xmlGenericError(xmlGenericErrorContext,
11346 "PP: try IGNORE\n");break;
11347 }
11348#endif
11349
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011350 if ((ctxt->input != NULL) &&
11351 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011352 xmlSHRINK(ctxt);
11353 ctxt->checkIndex = 0;
11354 }
11355 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011356
Daniel Veillarde50ba812013-04-11 15:54:51 +080011357 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011358 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011359 return(0);
11360
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011361
Owen Taylor3473f882001-02-23 17:55:21 +000011362 /*
11363 * Pop-up of finished entities.
11364 */
11365 while ((RAW == 0) && (ctxt->inputNr > 1))
11366 xmlPopInput(ctxt);
11367
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011368 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011369 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011370 avail = ctxt->input->length -
11371 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011372 else {
11373 /*
11374 * If we are operating on converted input, try to flush
11375 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011376 * buffer. But do not do this in document start where
11377 * encoding="..." may not have been read and we work on a
11378 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011379 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011380 if ((ctxt->instate != XML_PARSER_START) &&
11381 (ctxt->input->buf->raw != NULL) &&
11382 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011383 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11384 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011385 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011386
11387 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011388 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11389 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011390 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011391 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011392 (ctxt->input->cur - ctxt->input->base);
11393 }
Owen Taylor3473f882001-02-23 17:55:21 +000011394 if (avail < 1)
11395 goto done;
11396 switch (ctxt->instate) {
11397 case XML_PARSER_EOF:
11398 /*
11399 * Document parsing is done !
11400 */
11401 goto done;
11402 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011403 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11404 xmlChar start[4];
11405 xmlCharEncoding enc;
11406
11407 /*
11408 * Very first chars read from the document flow.
11409 */
11410 if (avail < 4)
11411 goto done;
11412
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011413 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011414 * Get the 4 first bytes and decode the charset
11415 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011416 * plug some encoding conversion routines,
11417 * else xmlSwitchEncoding will set to (default)
11418 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011419 */
11420 start[0] = RAW;
11421 start[1] = NXT(1);
11422 start[2] = NXT(2);
11423 start[3] = NXT(3);
11424 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011425 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011426 break;
11427 }
Owen Taylor3473f882001-02-23 17:55:21 +000011428
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011429 if (avail < 2)
11430 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011431 cur = ctxt->input->cur[0];
11432 next = ctxt->input->cur[1];
11433 if (cur == 0) {
11434 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11435 ctxt->sax->setDocumentLocator(ctxt->userData,
11436 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011437 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011438 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011439#ifdef DEBUG_PUSH
11440 xmlGenericError(xmlGenericErrorContext,
11441 "PP: entering EOF\n");
11442#endif
11443 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11444 ctxt->sax->endDocument(ctxt->userData);
11445 goto done;
11446 }
11447 if ((cur == '<') && (next == '?')) {
11448 /* PI or XML decl */
11449 if (avail < 5) return(ret);
11450 if ((!terminate) &&
11451 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11452 return(ret);
11453 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11454 ctxt->sax->setDocumentLocator(ctxt->userData,
11455 &xmlDefaultSAXLocator);
11456 if ((ctxt->input->cur[2] == 'x') &&
11457 (ctxt->input->cur[3] == 'm') &&
11458 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011459 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011460 ret += 5;
11461#ifdef DEBUG_PUSH
11462 xmlGenericError(xmlGenericErrorContext,
11463 "PP: Parsing XML Decl\n");
11464#endif
11465 xmlParseXMLDecl(ctxt);
11466 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11467 /*
11468 * The XML REC instructs us to stop parsing right
11469 * here
11470 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011471 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011472 return(0);
11473 }
11474 ctxt->standalone = ctxt->input->standalone;
11475 if ((ctxt->encoding == NULL) &&
11476 (ctxt->input->encoding != NULL))
11477 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11478 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11479 (!ctxt->disableSAX))
11480 ctxt->sax->startDocument(ctxt->userData);
11481 ctxt->instate = XML_PARSER_MISC;
11482#ifdef DEBUG_PUSH
11483 xmlGenericError(xmlGenericErrorContext,
11484 "PP: entering MISC\n");
11485#endif
11486 } else {
11487 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11488 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11489 (!ctxt->disableSAX))
11490 ctxt->sax->startDocument(ctxt->userData);
11491 ctxt->instate = XML_PARSER_MISC;
11492#ifdef DEBUG_PUSH
11493 xmlGenericError(xmlGenericErrorContext,
11494 "PP: entering MISC\n");
11495#endif
11496 }
11497 } else {
11498 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11499 ctxt->sax->setDocumentLocator(ctxt->userData,
11500 &xmlDefaultSAXLocator);
11501 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011502 if (ctxt->version == NULL) {
11503 xmlErrMemory(ctxt, NULL);
11504 break;
11505 }
Owen Taylor3473f882001-02-23 17:55:21 +000011506 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11507 (!ctxt->disableSAX))
11508 ctxt->sax->startDocument(ctxt->userData);
11509 ctxt->instate = XML_PARSER_MISC;
11510#ifdef DEBUG_PUSH
11511 xmlGenericError(xmlGenericErrorContext,
11512 "PP: entering MISC\n");
11513#endif
11514 }
11515 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011516 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011517 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011518 const xmlChar *prefix = NULL;
11519 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011520 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011521
11522 if ((avail < 2) && (ctxt->inputNr == 1))
11523 goto done;
11524 cur = ctxt->input->cur[0];
11525 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011526 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011527 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011528 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11529 ctxt->sax->endDocument(ctxt->userData);
11530 goto done;
11531 }
11532 if (!terminate) {
11533 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011534 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011535 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011536 goto done;
11537 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11538 goto done;
11539 }
11540 }
11541 if (ctxt->spaceNr == 0)
11542 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011543 else if (*ctxt->space == -2)
11544 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011545 else
11546 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011547#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011548 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011549#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011550 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011551#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011552 else
11553 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011554#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011555 if (ctxt->instate == XML_PARSER_EOF)
11556 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011557 if (name == NULL) {
11558 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011559 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011560 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11561 ctxt->sax->endDocument(ctxt->userData);
11562 goto done;
11563 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011564#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011565 /*
11566 * [ VC: Root Element Type ]
11567 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011568 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011569 */
11570 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11571 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11572 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011573#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011574
11575 /*
11576 * Check for an Empty Element.
11577 */
11578 if ((RAW == '/') && (NXT(1) == '>')) {
11579 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011580
11581 if (ctxt->sax2) {
11582 if ((ctxt->sax != NULL) &&
11583 (ctxt->sax->endElementNs != NULL) &&
11584 (!ctxt->disableSAX))
11585 ctxt->sax->endElementNs(ctxt->userData, name,
11586 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011587 if (ctxt->nsNr - nsNr > 0)
11588 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011589#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011590 } else {
11591 if ((ctxt->sax != NULL) &&
11592 (ctxt->sax->endElement != NULL) &&
11593 (!ctxt->disableSAX))
11594 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011595#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011596 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011597 if (ctxt->instate == XML_PARSER_EOF)
11598 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011599 spacePop(ctxt);
11600 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011601 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011602 } else {
11603 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011604 }
Daniel Veillard65686452012-07-19 18:25:01 +080011605 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011606 break;
11607 }
11608 if (RAW == '>') {
11609 NEXT;
11610 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011611 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011612 "Couldn't find end of Start Tag %s\n",
11613 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011614 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011615 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011616 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011617 if (ctxt->sax2)
11618 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011619#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011620 else
11621 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011622#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011623
Daniel Veillarda880b122003-04-21 21:36:41 +000011624 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011625 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011626 break;
11627 }
11628 case XML_PARSER_CONTENT: {
11629 const xmlChar *test;
11630 unsigned int cons;
11631 if ((avail < 2) && (ctxt->inputNr == 1))
11632 goto done;
11633 cur = ctxt->input->cur[0];
11634 next = ctxt->input->cur[1];
11635
11636 test = CUR_PTR;
11637 cons = ctxt->input->consumed;
11638 if ((cur == '<') && (next == '/')) {
11639 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011640 break;
11641 } else if ((cur == '<') && (next == '?')) {
11642 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011643 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11644 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011645 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011646 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011647 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011648 ctxt->instate = XML_PARSER_CONTENT;
11649 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011650 } else if ((cur == '<') && (next != '!')) {
11651 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011652 break;
11653 } else if ((cur == '<') && (next == '!') &&
11654 (ctxt->input->cur[2] == '-') &&
11655 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011656 int term;
11657
11658 if (avail < 4)
11659 goto done;
11660 ctxt->input->cur += 4;
11661 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11662 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011663 if ((!terminate) && (term < 0)) {
11664 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011665 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011666 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011667 xmlParseComment(ctxt);
11668 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011669 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011670 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11671 (ctxt->input->cur[2] == '[') &&
11672 (ctxt->input->cur[3] == 'C') &&
11673 (ctxt->input->cur[4] == 'D') &&
11674 (ctxt->input->cur[5] == 'A') &&
11675 (ctxt->input->cur[6] == 'T') &&
11676 (ctxt->input->cur[7] == 'A') &&
11677 (ctxt->input->cur[8] == '[')) {
11678 SKIP(9);
11679 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011680 break;
11681 } else if ((cur == '<') && (next == '!') &&
11682 (avail < 9)) {
11683 goto done;
11684 } else if (cur == '&') {
11685 if ((!terminate) &&
11686 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11687 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011688 xmlParseReference(ctxt);
11689 } else {
11690 /* TODO Avoid the extra copy, handle directly !!! */
11691 /*
11692 * Goal of the following test is:
11693 * - minimize calls to the SAX 'character' callback
11694 * when they are mergeable
11695 * - handle an problem for isBlank when we only parse
11696 * a sequence of blank chars and the next one is
11697 * not available to check against '<' presence.
11698 * - tries to homogenize the differences in SAX
11699 * callbacks between the push and pull versions
11700 * of the parser.
11701 */
11702 if ((ctxt->inputNr == 1) &&
11703 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11704 if (!terminate) {
11705 if (ctxt->progressive) {
11706 if ((lastlt == NULL) ||
11707 (ctxt->input->cur > lastlt))
11708 goto done;
11709 } else if (xmlParseLookupSequence(ctxt,
11710 '<', 0, 0) < 0) {
11711 goto done;
11712 }
11713 }
11714 }
11715 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011716 xmlParseCharData(ctxt, 0);
11717 }
11718 /*
11719 * Pop-up of finished entities.
11720 */
11721 while ((RAW == 0) && (ctxt->inputNr > 1))
11722 xmlPopInput(ctxt);
11723 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011724 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11725 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011726 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011727 break;
11728 }
11729 break;
11730 }
11731 case XML_PARSER_END_TAG:
11732 if (avail < 2)
11733 goto done;
11734 if (!terminate) {
11735 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011736 /* > can be found unescaped in attribute values */
11737 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011738 goto done;
11739 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11740 goto done;
11741 }
11742 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011743 if (ctxt->sax2) {
11744 xmlParseEndTag2(ctxt,
11745 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11746 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011747 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011748 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011749 }
11750#ifdef LIBXML_SAX1_ENABLED
11751 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011752 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011753#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011754 if (ctxt->instate == XML_PARSER_EOF) {
11755 /* Nothing */
11756 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011757 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011758 } else {
11759 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011760 }
11761 break;
11762 case XML_PARSER_CDATA_SECTION: {
11763 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011764 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011765 * cdataBlock merge back contiguous callbacks.
11766 */
11767 int base;
11768
11769 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11770 if (base < 0) {
11771 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011772 int tmp;
11773
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011774 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011775 XML_PARSER_BIG_BUFFER_SIZE);
11776 if (tmp < 0) {
11777 tmp = -tmp;
11778 ctxt->input->cur += tmp;
11779 goto encoding_error;
11780 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011781 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11782 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011783 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011784 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011785 else if (ctxt->sax->characters != NULL)
11786 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011787 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011788 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011789 if (ctxt->instate == XML_PARSER_EOF)
11790 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011791 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011792 ctxt->checkIndex = 0;
11793 }
11794 goto done;
11795 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011796 int tmp;
11797
11798 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11799 if ((tmp < 0) || (tmp != base)) {
11800 tmp = -tmp;
11801 ctxt->input->cur += tmp;
11802 goto encoding_error;
11803 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011804 if ((ctxt->sax != NULL) && (base == 0) &&
11805 (ctxt->sax->cdataBlock != NULL) &&
11806 (!ctxt->disableSAX)) {
11807 /*
11808 * Special case to provide identical behaviour
11809 * between pull and push parsers on enpty CDATA
11810 * sections
11811 */
11812 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11813 (!strncmp((const char *)&ctxt->input->cur[-9],
11814 "<![CDATA[", 9)))
11815 ctxt->sax->cdataBlock(ctxt->userData,
11816 BAD_CAST "", 0);
11817 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011818 (!ctxt->disableSAX)) {
11819 if (ctxt->sax->cdataBlock != NULL)
11820 ctxt->sax->cdataBlock(ctxt->userData,
11821 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011822 else if (ctxt->sax->characters != NULL)
11823 ctxt->sax->characters(ctxt->userData,
11824 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011825 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011826 if (ctxt->instate == XML_PARSER_EOF)
11827 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011828 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011829 ctxt->checkIndex = 0;
11830 ctxt->instate = XML_PARSER_CONTENT;
11831#ifdef DEBUG_PUSH
11832 xmlGenericError(xmlGenericErrorContext,
11833 "PP: entering CONTENT\n");
11834#endif
11835 }
11836 break;
11837 }
Owen Taylor3473f882001-02-23 17:55:21 +000011838 case XML_PARSER_MISC:
11839 SKIP_BLANKS;
11840 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011841 avail = ctxt->input->length -
11842 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011843 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011844 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011845 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011846 if (avail < 2)
11847 goto done;
11848 cur = ctxt->input->cur[0];
11849 next = ctxt->input->cur[1];
11850 if ((cur == '<') && (next == '?')) {
11851 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011852 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11853 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011854 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011855 }
Owen Taylor3473f882001-02-23 17:55:21 +000011856#ifdef DEBUG_PUSH
11857 xmlGenericError(xmlGenericErrorContext,
11858 "PP: Parsing PI\n");
11859#endif
11860 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011861 if (ctxt->instate == XML_PARSER_EOF)
11862 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011863 ctxt->instate = XML_PARSER_MISC;
11864 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011865 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011866 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011867 (ctxt->input->cur[2] == '-') &&
11868 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011869 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011870 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11871 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011872 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011873 }
Owen Taylor3473f882001-02-23 17:55:21 +000011874#ifdef DEBUG_PUSH
11875 xmlGenericError(xmlGenericErrorContext,
11876 "PP: Parsing Comment\n");
11877#endif
11878 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011879 if (ctxt->instate == XML_PARSER_EOF)
11880 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011881 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011882 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011883 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011884 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011885 (ctxt->input->cur[2] == 'D') &&
11886 (ctxt->input->cur[3] == 'O') &&
11887 (ctxt->input->cur[4] == 'C') &&
11888 (ctxt->input->cur[5] == 'T') &&
11889 (ctxt->input->cur[6] == 'Y') &&
11890 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011891 (ctxt->input->cur[8] == 'E')) {
11892 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011893 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11894 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011895 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011896 }
Owen Taylor3473f882001-02-23 17:55:21 +000011897#ifdef DEBUG_PUSH
11898 xmlGenericError(xmlGenericErrorContext,
11899 "PP: Parsing internal subset\n");
11900#endif
11901 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011902 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011903 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011904 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011905 if (ctxt->instate == XML_PARSER_EOF)
11906 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011907 if (RAW == '[') {
11908 ctxt->instate = XML_PARSER_DTD;
11909#ifdef DEBUG_PUSH
11910 xmlGenericError(xmlGenericErrorContext,
11911 "PP: entering DTD\n");
11912#endif
11913 } else {
11914 /*
11915 * Create and update the external subset.
11916 */
11917 ctxt->inSubset = 2;
11918 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11919 (ctxt->sax->externalSubset != NULL))
11920 ctxt->sax->externalSubset(ctxt->userData,
11921 ctxt->intSubName, ctxt->extSubSystem,
11922 ctxt->extSubURI);
11923 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011924 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011925 ctxt->instate = XML_PARSER_PROLOG;
11926#ifdef DEBUG_PUSH
11927 xmlGenericError(xmlGenericErrorContext,
11928 "PP: entering PROLOG\n");
11929#endif
11930 }
11931 } else if ((cur == '<') && (next == '!') &&
11932 (avail < 9)) {
11933 goto done;
11934 } else {
11935 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011936 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011937 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011938#ifdef DEBUG_PUSH
11939 xmlGenericError(xmlGenericErrorContext,
11940 "PP: entering START_TAG\n");
11941#endif
11942 }
11943 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011944 case XML_PARSER_PROLOG:
11945 SKIP_BLANKS;
11946 if (ctxt->input->buf == NULL)
11947 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11948 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011949 avail = xmlBufUse(ctxt->input->buf->buffer) -
11950 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011951 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011952 goto done;
11953 cur = ctxt->input->cur[0];
11954 next = ctxt->input->cur[1];
11955 if ((cur == '<') && (next == '?')) {
11956 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011957 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11958 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011959 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011960 }
Owen Taylor3473f882001-02-23 17:55:21 +000011961#ifdef DEBUG_PUSH
11962 xmlGenericError(xmlGenericErrorContext,
11963 "PP: Parsing PI\n");
11964#endif
11965 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011966 if (ctxt->instate == XML_PARSER_EOF)
11967 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011968 ctxt->instate = XML_PARSER_PROLOG;
11969 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011970 } else if ((cur == '<') && (next == '!') &&
11971 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11972 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011973 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11974 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011975 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011976 }
Owen Taylor3473f882001-02-23 17:55:21 +000011977#ifdef DEBUG_PUSH
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: Parsing Comment\n");
11980#endif
11981 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011982 if (ctxt->instate == XML_PARSER_EOF)
11983 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011984 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011985 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011986 } else if ((cur == '<') && (next == '!') &&
11987 (avail < 4)) {
11988 goto done;
11989 } else {
11990 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011991 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011992 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011993 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011994#ifdef DEBUG_PUSH
11995 xmlGenericError(xmlGenericErrorContext,
11996 "PP: entering START_TAG\n");
11997#endif
11998 }
11999 break;
12000 case XML_PARSER_EPILOG:
12001 SKIP_BLANKS;
12002 if (ctxt->input->buf == NULL)
12003 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
12004 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012005 avail = xmlBufUse(ctxt->input->buf->buffer) -
12006 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000012007 if (avail < 2)
12008 goto done;
12009 cur = ctxt->input->cur[0];
12010 next = ctxt->input->cur[1];
12011 if ((cur == '<') && (next == '?')) {
12012 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080012013 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
12014 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000012015 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080012016 }
Owen Taylor3473f882001-02-23 17:55:21 +000012017#ifdef DEBUG_PUSH
12018 xmlGenericError(xmlGenericErrorContext,
12019 "PP: Parsing PI\n");
12020#endif
12021 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012022 if (ctxt->instate == XML_PARSER_EOF)
12023 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012024 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080012025 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012026 } else if ((cur == '<') && (next == '!') &&
12027 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12028 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080012029 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12030 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012031 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080012032 }
Owen Taylor3473f882001-02-23 17:55:21 +000012033#ifdef DEBUG_PUSH
12034 xmlGenericError(xmlGenericErrorContext,
12035 "PP: Parsing Comment\n");
12036#endif
12037 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012038 if (ctxt->instate == XML_PARSER_EOF)
12039 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012040 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080012041 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012042 } else if ((cur == '<') && (next == '!') &&
12043 (avail < 4)) {
12044 goto done;
12045 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012046 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080012047 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012048#ifdef DEBUG_PUSH
12049 xmlGenericError(xmlGenericErrorContext,
12050 "PP: entering EOF\n");
12051#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012052 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012053 ctxt->sax->endDocument(ctxt->userData);
12054 goto done;
12055 }
12056 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012057 case XML_PARSER_DTD: {
12058 /*
12059 * Sorry but progressive parsing of the internal subset
12060 * is not expected to be supported. We first check that
12061 * the full content of the internal subset is available and
12062 * the parsing is launched only at that point.
12063 * Internal subset ends up with "']' S? '>'" in an unescaped
12064 * section and not in a ']]>' sequence which are conditional
12065 * sections (whoever argued to keep that crap in XML deserve
12066 * a place in hell !).
12067 */
12068 int base, i;
12069 xmlChar *buf;
12070 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012071 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000012072
12073 base = ctxt->input->cur - ctxt->input->base;
12074 if (base < 0) return(0);
12075 if (ctxt->checkIndex > base)
12076 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012077 buf = xmlBufContent(ctxt->input->buf->buffer);
12078 use = xmlBufUse(ctxt->input->buf->buffer);
12079 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000012080 if (quote != 0) {
12081 if (buf[base] == quote)
12082 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012083 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000012084 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012085 if ((quote == 0) && (buf[base] == '<')) {
12086 int found = 0;
12087 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012088 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000012089 (buf[base + 1] == '!') &&
12090 (buf[base + 2] == '-') &&
12091 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012092 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000012093 if ((buf[base] == '-') &&
12094 (buf[base + 1] == '-') &&
12095 (buf[base + 2] == '>')) {
12096 found = 1;
12097 base += 2;
12098 break;
12099 }
12100 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012101 if (!found) {
12102#if 0
12103 fprintf(stderr, "unfinished comment\n");
12104#endif
12105 break; /* for */
12106 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012107 continue;
12108 }
12109 }
Owen Taylor3473f882001-02-23 17:55:21 +000012110 if (buf[base] == '"') {
12111 quote = '"';
12112 continue;
12113 }
12114 if (buf[base] == '\'') {
12115 quote = '\'';
12116 continue;
12117 }
12118 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012119#if 0
12120 fprintf(stderr, "%c%c%c%c: ", buf[base],
12121 buf[base + 1], buf[base + 2], buf[base + 3]);
12122#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012123 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012124 break;
12125 if (buf[base + 1] == ']') {
12126 /* conditional crap, skip both ']' ! */
12127 base++;
12128 continue;
12129 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012130 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012131 if (buf[base + i] == '>') {
12132#if 0
12133 fprintf(stderr, "found\n");
12134#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012135 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012136 }
12137 if (!IS_BLANK_CH(buf[base + i])) {
12138#if 0
12139 fprintf(stderr, "not found\n");
12140#endif
12141 goto not_end_of_int_subset;
12142 }
Owen Taylor3473f882001-02-23 17:55:21 +000012143 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012144#if 0
12145 fprintf(stderr, "end of stream\n");
12146#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012147 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012148
Owen Taylor3473f882001-02-23 17:55:21 +000012149 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012150not_end_of_int_subset:
12151 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012152 }
12153 /*
12154 * We didn't found the end of the Internal subset
12155 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012156 if (quote == 0)
12157 ctxt->checkIndex = base;
12158 else
12159 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012160#ifdef DEBUG_PUSH
12161 if (next == 0)
12162 xmlGenericError(xmlGenericErrorContext,
12163 "PP: lookup of int subset end filed\n");
12164#endif
12165 goto done;
12166
12167found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012168 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012169 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012170 if (ctxt->instate == XML_PARSER_EOF)
12171 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012172 ctxt->inSubset = 2;
12173 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12174 (ctxt->sax->externalSubset != NULL))
12175 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12176 ctxt->extSubSystem, ctxt->extSubURI);
12177 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012178 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012179 if (ctxt->instate == XML_PARSER_EOF)
12180 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012181 ctxt->instate = XML_PARSER_PROLOG;
12182 ctxt->checkIndex = 0;
12183#ifdef DEBUG_PUSH
12184 xmlGenericError(xmlGenericErrorContext,
12185 "PP: entering PROLOG\n");
12186#endif
12187 break;
12188 }
12189 case XML_PARSER_COMMENT:
12190 xmlGenericError(xmlGenericErrorContext,
12191 "PP: internal error, state == COMMENT\n");
12192 ctxt->instate = XML_PARSER_CONTENT;
12193#ifdef DEBUG_PUSH
12194 xmlGenericError(xmlGenericErrorContext,
12195 "PP: entering CONTENT\n");
12196#endif
12197 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012198 case XML_PARSER_IGNORE:
12199 xmlGenericError(xmlGenericErrorContext,
12200 "PP: internal error, state == IGNORE");
12201 ctxt->instate = XML_PARSER_DTD;
12202#ifdef DEBUG_PUSH
12203 xmlGenericError(xmlGenericErrorContext,
12204 "PP: entering DTD\n");
12205#endif
12206 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012207 case XML_PARSER_PI:
12208 xmlGenericError(xmlGenericErrorContext,
12209 "PP: internal error, state == PI\n");
12210 ctxt->instate = XML_PARSER_CONTENT;
12211#ifdef DEBUG_PUSH
12212 xmlGenericError(xmlGenericErrorContext,
12213 "PP: entering CONTENT\n");
12214#endif
12215 break;
12216 case XML_PARSER_ENTITY_DECL:
12217 xmlGenericError(xmlGenericErrorContext,
12218 "PP: internal error, state == ENTITY_DECL\n");
12219 ctxt->instate = XML_PARSER_DTD;
12220#ifdef DEBUG_PUSH
12221 xmlGenericError(xmlGenericErrorContext,
12222 "PP: entering DTD\n");
12223#endif
12224 break;
12225 case XML_PARSER_ENTITY_VALUE:
12226 xmlGenericError(xmlGenericErrorContext,
12227 "PP: internal error, state == ENTITY_VALUE\n");
12228 ctxt->instate = XML_PARSER_CONTENT;
12229#ifdef DEBUG_PUSH
12230 xmlGenericError(xmlGenericErrorContext,
12231 "PP: entering DTD\n");
12232#endif
12233 break;
12234 case XML_PARSER_ATTRIBUTE_VALUE:
12235 xmlGenericError(xmlGenericErrorContext,
12236 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12237 ctxt->instate = XML_PARSER_START_TAG;
12238#ifdef DEBUG_PUSH
12239 xmlGenericError(xmlGenericErrorContext,
12240 "PP: entering START_TAG\n");
12241#endif
12242 break;
12243 case XML_PARSER_SYSTEM_LITERAL:
12244 xmlGenericError(xmlGenericErrorContext,
12245 "PP: internal error, state == SYSTEM_LITERAL\n");
12246 ctxt->instate = XML_PARSER_START_TAG;
12247#ifdef DEBUG_PUSH
12248 xmlGenericError(xmlGenericErrorContext,
12249 "PP: entering START_TAG\n");
12250#endif
12251 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012252 case XML_PARSER_PUBLIC_LITERAL:
12253 xmlGenericError(xmlGenericErrorContext,
12254 "PP: internal error, state == PUBLIC_LITERAL\n");
12255 ctxt->instate = XML_PARSER_START_TAG;
12256#ifdef DEBUG_PUSH
12257 xmlGenericError(xmlGenericErrorContext,
12258 "PP: entering START_TAG\n");
12259#endif
12260 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012261 }
12262 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012263done:
Owen Taylor3473f882001-02-23 17:55:21 +000012264#ifdef DEBUG_PUSH
12265 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12266#endif
12267 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012268encoding_error:
12269 {
12270 char buffer[150];
12271
12272 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12273 ctxt->input->cur[0], ctxt->input->cur[1],
12274 ctxt->input->cur[2], ctxt->input->cur[3]);
12275 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12276 "Input is not proper UTF-8, indicate encoding !\n%s",
12277 BAD_CAST buffer, NULL);
12278 }
12279 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012280}
12281
12282/**
Daniel Veillard65686452012-07-19 18:25:01 +080012283 * xmlParseCheckTransition:
12284 * @ctxt: an XML parser context
12285 * @chunk: a char array
12286 * @size: the size in byte of the chunk
12287 *
12288 * Check depending on the current parser state if the chunk given must be
12289 * processed immediately or one need more data to advance on parsing.
12290 *
12291 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12292 */
12293static int
12294xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12295 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12296 return(-1);
12297 if (ctxt->instate == XML_PARSER_START_TAG) {
12298 if (memchr(chunk, '>', size) != NULL)
12299 return(1);
12300 return(0);
12301 }
12302 if (ctxt->progressive == XML_PARSER_COMMENT) {
12303 if (memchr(chunk, '>', size) != NULL)
12304 return(1);
12305 return(0);
12306 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012307 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12308 if (memchr(chunk, '>', size) != NULL)
12309 return(1);
12310 return(0);
12311 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012312 if (ctxt->progressive == XML_PARSER_PI) {
12313 if (memchr(chunk, '>', size) != NULL)
12314 return(1);
12315 return(0);
12316 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012317 if (ctxt->instate == XML_PARSER_END_TAG) {
12318 if (memchr(chunk, '>', size) != NULL)
12319 return(1);
12320 return(0);
12321 }
12322 if ((ctxt->progressive == XML_PARSER_DTD) ||
12323 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012324 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012325 return(1);
12326 return(0);
12327 }
Daniel Veillard65686452012-07-19 18:25:01 +080012328 return(1);
12329}
12330
12331/**
Owen Taylor3473f882001-02-23 17:55:21 +000012332 * xmlParseChunk:
12333 * @ctxt: an XML parser context
12334 * @chunk: an char array
12335 * @size: the size in byte of the chunk
12336 * @terminate: last chunk indicator
12337 *
12338 * Parse a Chunk of memory
12339 *
12340 * Returns zero if no error, the xmlParserErrors otherwise.
12341 */
12342int
12343xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12344 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012345 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012346 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012347 size_t old_avail = 0;
12348 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012349
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012350 if (ctxt == NULL)
12351 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012352 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012353 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012354 if (ctxt->instate == XML_PARSER_EOF)
12355 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012356 if (ctxt->instate == XML_PARSER_START)
12357 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012358 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12359 (chunk[size - 1] == '\r')) {
12360 end_in_lf = 1;
12361 size--;
12362 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012363
12364xmldecl_done:
12365
Owen Taylor3473f882001-02-23 17:55:21 +000012366 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12367 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012368 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12369 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012370 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012371
Daniel Veillard65686452012-07-19 18:25:01 +080012372 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012373 /*
12374 * Specific handling if we autodetected an encoding, we should not
12375 * push more than the first line ... which depend on the encoding
12376 * And only push the rest once the final encoding was detected
12377 */
12378 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12379 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012380 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012381
12382 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12383 BAD_CAST "UTF-16")) ||
12384 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12385 BAD_CAST "UTF16")))
12386 len = 90;
12387 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12388 BAD_CAST "UCS-4")) ||
12389 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12390 BAD_CAST "UCS4")))
12391 len = 180;
12392
12393 if (ctxt->input->buf->rawconsumed < len)
12394 len -= ctxt->input->buf->rawconsumed;
12395
Raul Hudeaba9716a2010-03-15 10:13:29 +010012396 /*
12397 * Change size for reading the initial declaration only
12398 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12399 * will blindly copy extra bytes from memory.
12400 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012401 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012402 remain = size - len;
12403 size = len;
12404 } else {
12405 remain = 0;
12406 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012407 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012408 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012409 if (res < 0) {
12410 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012411 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012412 return (XML_PARSER_EOF);
12413 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012414 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012415#ifdef DEBUG_PUSH
12416 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12417#endif
12418
Owen Taylor3473f882001-02-23 17:55:21 +000012419 } else if (ctxt->instate != XML_PARSER_EOF) {
12420 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12421 xmlParserInputBufferPtr in = ctxt->input->buf;
12422 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12423 (in->raw != NULL)) {
12424 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012425 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12426 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012427
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012428 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012429 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012430 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012431 xmlGenericError(xmlGenericErrorContext,
12432 "xmlParseChunk: encoder error\n");
12433 return(XML_ERR_INVALID_ENCODING);
12434 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012435 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012436 }
12437 }
12438 }
Daniel Veillard65686452012-07-19 18:25:01 +080012439 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012440 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012441 } else {
12442 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12443 avail = xmlBufUse(ctxt->input->buf->buffer);
12444 /*
12445 * Depending on the current state it may not be such
12446 * a good idea to try parsing if there is nothing in the chunk
12447 * which would be worth doing a parser state transition and we
12448 * need to wait for more data
12449 */
12450 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12451 (old_avail == 0) || (avail == 0) ||
12452 (xmlParseCheckTransition(ctxt,
12453 (const char *)&ctxt->input->base[old_avail],
12454 avail - old_avail)))
12455 xmlParseTryOrFinish(ctxt, terminate);
12456 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012457 if (ctxt->instate == XML_PARSER_EOF)
12458 return(ctxt->errNo);
12459
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012460 if ((ctxt->input != NULL) &&
12461 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12462 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12463 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12464 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012465 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012466 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012467 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12468 return(ctxt->errNo);
12469
12470 if (remain != 0) {
12471 chunk += size;
12472 size = remain;
12473 remain = 0;
12474 goto xmldecl_done;
12475 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012476 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12477 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012478 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12479 ctxt->input);
12480 size_t current = ctxt->input->cur - ctxt->input->base;
12481
Daniel Veillarda617e242006-01-09 14:38:44 +000012482 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012483
12484 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12485 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012486 }
Owen Taylor3473f882001-02-23 17:55:21 +000012487 if (terminate) {
12488 /*
12489 * Check for termination
12490 */
Daniel Veillard65686452012-07-19 18:25:01 +080012491 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012492
12493 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012494 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012495 cur_avail = ctxt->input->length -
12496 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012497 else
Daniel Veillard65686452012-07-19 18:25:01 +080012498 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12499 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012500 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012501
Owen Taylor3473f882001-02-23 17:55:21 +000012502 if ((ctxt->instate != XML_PARSER_EOF) &&
12503 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012504 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012505 }
Daniel Veillard65686452012-07-19 18:25:01 +080012506 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012507 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012508 }
Owen Taylor3473f882001-02-23 17:55:21 +000012509 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012510 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012511 ctxt->sax->endDocument(ctxt->userData);
12512 }
12513 ctxt->instate = XML_PARSER_EOF;
12514 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012515 if (ctxt->wellFormed == 0)
12516 return((xmlParserErrors) ctxt->errNo);
12517 else
12518 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012519}
12520
12521/************************************************************************
12522 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012523 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012524 * *
12525 ************************************************************************/
12526
12527/**
Owen Taylor3473f882001-02-23 17:55:21 +000012528 * xmlCreatePushParserCtxt:
12529 * @sax: a SAX handler
12530 * @user_data: The user data returned on SAX callbacks
12531 * @chunk: a pointer to an array of chars
12532 * @size: number of chars in the array
12533 * @filename: an optional file name or URI
12534 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012535 * Create a parser context for using the XML parser in push mode.
12536 * If @buffer and @size are non-NULL, the data is used to detect
12537 * the encoding. The remaining characters will be parsed so they
12538 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012539 * To allow content encoding detection, @size should be >= 4
12540 * The value of @filename is used for fetching external entities
12541 * and error/warning reports.
12542 *
12543 * Returns the new parser context or NULL
12544 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012545
Owen Taylor3473f882001-02-23 17:55:21 +000012546xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012547xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012548 const char *chunk, int size, const char *filename) {
12549 xmlParserCtxtPtr ctxt;
12550 xmlParserInputPtr inputStream;
12551 xmlParserInputBufferPtr buf;
12552 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12553
12554 /*
12555 * plug some encoding conversion routines
12556 */
12557 if ((chunk != NULL) && (size >= 4))
12558 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12559
12560 buf = xmlAllocParserInputBuffer(enc);
12561 if (buf == NULL) return(NULL);
12562
12563 ctxt = xmlNewParserCtxt();
12564 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012565 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012566 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012567 return(NULL);
12568 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012569 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012570 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12571 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012572 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012573 xmlFreeParserInputBuffer(buf);
12574 xmlFreeParserCtxt(ctxt);
12575 return(NULL);
12576 }
Owen Taylor3473f882001-02-23 17:55:21 +000012577 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012578#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012579 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012580#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012581 xmlFree(ctxt->sax);
12582 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12583 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012584 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012585 xmlFreeParserInputBuffer(buf);
12586 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012587 return(NULL);
12588 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012589 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12590 if (sax->initialized == XML_SAX2_MAGIC)
12591 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12592 else
12593 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012594 if (user_data != NULL)
12595 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012596 }
Owen Taylor3473f882001-02-23 17:55:21 +000012597 if (filename == NULL) {
12598 ctxt->directory = NULL;
12599 } else {
12600 ctxt->directory = xmlParserGetDirectory(filename);
12601 }
12602
12603 inputStream = xmlNewInputStream(ctxt);
12604 if (inputStream == NULL) {
12605 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012606 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012607 return(NULL);
12608 }
12609
12610 if (filename == NULL)
12611 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012612 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012613 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012614 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012615 if (inputStream->filename == NULL) {
12616 xmlFreeParserCtxt(ctxt);
12617 xmlFreeParserInputBuffer(buf);
12618 return(NULL);
12619 }
12620 }
Owen Taylor3473f882001-02-23 17:55:21 +000012621 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012622 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012623 inputPush(ctxt, inputStream);
12624
William M. Brack3a1cd212005-02-11 14:35:54 +000012625 /*
12626 * If the caller didn't provide an initial 'chunk' for determining
12627 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12628 * that it can be automatically determined later
12629 */
12630 if ((size == 0) || (chunk == NULL)) {
12631 ctxt->charset = XML_CHAR_ENCODING_NONE;
12632 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012633 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12634 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012635
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012636 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012637
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012638 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012639#ifdef DEBUG_PUSH
12640 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12641#endif
12642 }
12643
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012644 if (enc != XML_CHAR_ENCODING_NONE) {
12645 xmlSwitchEncoding(ctxt, enc);
12646 }
12647
Owen Taylor3473f882001-02-23 17:55:21 +000012648 return(ctxt);
12649}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012650#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012651
12652/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012653 * xmlHaltParser:
12654 * @ctxt: an XML parser context
12655 *
12656 * Blocks further parser processing don't override error
12657 * for internal use
12658 */
12659static void
12660xmlHaltParser(xmlParserCtxtPtr ctxt) {
12661 if (ctxt == NULL)
12662 return;
12663 ctxt->instate = XML_PARSER_EOF;
12664 ctxt->disableSAX = 1;
12665 if (ctxt->input != NULL) {
12666 /*
12667 * in case there was a specific allocation deallocate before
12668 * overriding base
12669 */
12670 if (ctxt->input->free != NULL) {
12671 ctxt->input->free((xmlChar *) ctxt->input->base);
12672 ctxt->input->free = NULL;
12673 }
12674 ctxt->input->cur = BAD_CAST"";
12675 ctxt->input->base = ctxt->input->cur;
12676 }
12677}
12678
12679/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012680 * xmlStopParser:
12681 * @ctxt: an XML parser context
12682 *
12683 * Blocks further parser processing
12684 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012685void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012686xmlStopParser(xmlParserCtxtPtr ctxt) {
12687 if (ctxt == NULL)
12688 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012689 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012690 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012691}
12692
12693/**
Owen Taylor3473f882001-02-23 17:55:21 +000012694 * xmlCreateIOParserCtxt:
12695 * @sax: a SAX handler
12696 * @user_data: The user data returned on SAX callbacks
12697 * @ioread: an I/O read function
12698 * @ioclose: an I/O close function
12699 * @ioctx: an I/O handler
12700 * @enc: the charset encoding if known
12701 *
12702 * Create a parser context for using the XML parser with an existing
12703 * I/O stream
12704 *
12705 * Returns the new parser context or NULL
12706 */
12707xmlParserCtxtPtr
12708xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12709 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12710 void *ioctx, xmlCharEncoding enc) {
12711 xmlParserCtxtPtr ctxt;
12712 xmlParserInputPtr inputStream;
12713 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012714
Daniel Veillard42595322004-11-08 10:52:06 +000012715 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012716
12717 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012718 if (buf == NULL) {
12719 if (ioclose != NULL)
12720 ioclose(ioctx);
12721 return (NULL);
12722 }
Owen Taylor3473f882001-02-23 17:55:21 +000012723
12724 ctxt = xmlNewParserCtxt();
12725 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012726 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012727 return(NULL);
12728 }
12729 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012730#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012731 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012732#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012733 xmlFree(ctxt->sax);
12734 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12735 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012736 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012737 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012738 return(NULL);
12739 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012740 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12741 if (sax->initialized == XML_SAX2_MAGIC)
12742 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12743 else
12744 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012745 if (user_data != NULL)
12746 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012747 }
Owen Taylor3473f882001-02-23 17:55:21 +000012748
12749 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12750 if (inputStream == NULL) {
12751 xmlFreeParserCtxt(ctxt);
12752 return(NULL);
12753 }
12754 inputPush(ctxt, inputStream);
12755
12756 return(ctxt);
12757}
12758
Daniel Veillard4432df22003-09-28 18:58:27 +000012759#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012760/************************************************************************
12761 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012762 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012763 * *
12764 ************************************************************************/
12765
12766/**
12767 * xmlIOParseDTD:
12768 * @sax: the SAX handler block or NULL
12769 * @input: an Input Buffer
12770 * @enc: the charset encoding if known
12771 *
12772 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012773 *
Owen Taylor3473f882001-02-23 17:55:21 +000012774 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012775 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012776 */
12777
12778xmlDtdPtr
12779xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12780 xmlCharEncoding enc) {
12781 xmlDtdPtr ret = NULL;
12782 xmlParserCtxtPtr ctxt;
12783 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012784 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012785
12786 if (input == NULL)
12787 return(NULL);
12788
12789 ctxt = xmlNewParserCtxt();
12790 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012791 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012792 return(NULL);
12793 }
12794
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012795 /* We are loading a DTD */
12796 ctxt->options |= XML_PARSE_DTDLOAD;
12797
Owen Taylor3473f882001-02-23 17:55:21 +000012798 /*
12799 * Set-up the SAX context
12800 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012801 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012802 if (ctxt->sax != NULL)
12803 xmlFree(ctxt->sax);
12804 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012805 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012806 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012807 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012808
12809 /*
12810 * generate a parser input from the I/O handler
12811 */
12812
Daniel Veillard43caefb2003-12-07 19:32:22 +000012813 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012814 if (pinput == NULL) {
12815 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012816 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012817 xmlFreeParserCtxt(ctxt);
12818 return(NULL);
12819 }
12820
12821 /*
12822 * plug some encoding conversion routines here.
12823 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012824 if (xmlPushInput(ctxt, pinput) < 0) {
12825 if (sax != NULL) ctxt->sax = NULL;
12826 xmlFreeParserCtxt(ctxt);
12827 return(NULL);
12828 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012829 if (enc != XML_CHAR_ENCODING_NONE) {
12830 xmlSwitchEncoding(ctxt, enc);
12831 }
Owen Taylor3473f882001-02-23 17:55:21 +000012832
12833 pinput->filename = NULL;
12834 pinput->line = 1;
12835 pinput->col = 1;
12836 pinput->base = ctxt->input->cur;
12837 pinput->cur = ctxt->input->cur;
12838 pinput->free = NULL;
12839
12840 /*
12841 * let's parse that entity knowing it's an external subset.
12842 */
12843 ctxt->inSubset = 2;
12844 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012845 if (ctxt->myDoc == NULL) {
12846 xmlErrMemory(ctxt, "New Doc failed");
12847 return(NULL);
12848 }
12849 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012850 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12851 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012852
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012853 if ((enc == XML_CHAR_ENCODING_NONE) &&
12854 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012855 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012856 * Get the 4 first bytes and decode the charset
12857 * if enc != XML_CHAR_ENCODING_NONE
12858 * plug some encoding conversion routines.
12859 */
12860 start[0] = RAW;
12861 start[1] = NXT(1);
12862 start[2] = NXT(2);
12863 start[3] = NXT(3);
12864 enc = xmlDetectCharEncoding(start, 4);
12865 if (enc != XML_CHAR_ENCODING_NONE) {
12866 xmlSwitchEncoding(ctxt, enc);
12867 }
12868 }
12869
Owen Taylor3473f882001-02-23 17:55:21 +000012870 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12871
12872 if (ctxt->myDoc != NULL) {
12873 if (ctxt->wellFormed) {
12874 ret = ctxt->myDoc->extSubset;
12875 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012876 if (ret != NULL) {
12877 xmlNodePtr tmp;
12878
12879 ret->doc = NULL;
12880 tmp = ret->children;
12881 while (tmp != NULL) {
12882 tmp->doc = NULL;
12883 tmp = tmp->next;
12884 }
12885 }
Owen Taylor3473f882001-02-23 17:55:21 +000012886 } else {
12887 ret = NULL;
12888 }
12889 xmlFreeDoc(ctxt->myDoc);
12890 ctxt->myDoc = NULL;
12891 }
12892 if (sax != NULL) ctxt->sax = NULL;
12893 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012894
Owen Taylor3473f882001-02-23 17:55:21 +000012895 return(ret);
12896}
12897
12898/**
12899 * xmlSAXParseDTD:
12900 * @sax: the SAX handler block
12901 * @ExternalID: a NAME* containing the External ID of the DTD
12902 * @SystemID: a NAME* containing the URL to the DTD
12903 *
12904 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012905 *
Owen Taylor3473f882001-02-23 17:55:21 +000012906 * Returns the resulting xmlDtdPtr or NULL in case of error.
12907 */
12908
12909xmlDtdPtr
12910xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12911 const xmlChar *SystemID) {
12912 xmlDtdPtr ret = NULL;
12913 xmlParserCtxtPtr ctxt;
12914 xmlParserInputPtr input = NULL;
12915 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012916 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012917
12918 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12919
12920 ctxt = xmlNewParserCtxt();
12921 if (ctxt == NULL) {
12922 return(NULL);
12923 }
12924
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012925 /* We are loading a DTD */
12926 ctxt->options |= XML_PARSE_DTDLOAD;
12927
Owen Taylor3473f882001-02-23 17:55:21 +000012928 /*
12929 * Set-up the SAX context
12930 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012931 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012932 if (ctxt->sax != NULL)
12933 xmlFree(ctxt->sax);
12934 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012935 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012936 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012937
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012938 /*
12939 * Canonicalise the system ID
12940 */
12941 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012942 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012943 xmlFreeParserCtxt(ctxt);
12944 return(NULL);
12945 }
Owen Taylor3473f882001-02-23 17:55:21 +000012946
12947 /*
12948 * Ask the Entity resolver to load the damn thing
12949 */
12950
12951 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012952 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12953 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012954 if (input == NULL) {
12955 if (sax != NULL) ctxt->sax = NULL;
12956 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012957 if (systemIdCanonic != NULL)
12958 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012959 return(NULL);
12960 }
12961
12962 /*
12963 * plug some encoding conversion routines here.
12964 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012965 if (xmlPushInput(ctxt, input) < 0) {
12966 if (sax != NULL) ctxt->sax = NULL;
12967 xmlFreeParserCtxt(ctxt);
12968 if (systemIdCanonic != NULL)
12969 xmlFree(systemIdCanonic);
12970 return(NULL);
12971 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012972 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12973 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12974 xmlSwitchEncoding(ctxt, enc);
12975 }
Owen Taylor3473f882001-02-23 17:55:21 +000012976
12977 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012978 input->filename = (char *) systemIdCanonic;
12979 else
12980 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012981 input->line = 1;
12982 input->col = 1;
12983 input->base = ctxt->input->cur;
12984 input->cur = ctxt->input->cur;
12985 input->free = NULL;
12986
12987 /*
12988 * let's parse that entity knowing it's an external subset.
12989 */
12990 ctxt->inSubset = 2;
12991 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012992 if (ctxt->myDoc == NULL) {
12993 xmlErrMemory(ctxt, "New Doc failed");
12994 if (sax != NULL) ctxt->sax = NULL;
12995 xmlFreeParserCtxt(ctxt);
12996 return(NULL);
12997 }
12998 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012999 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
13000 ExternalID, SystemID);
13001 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
13002
13003 if (ctxt->myDoc != NULL) {
13004 if (ctxt->wellFormed) {
13005 ret = ctxt->myDoc->extSubset;
13006 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000013007 if (ret != NULL) {
13008 xmlNodePtr tmp;
13009
13010 ret->doc = NULL;
13011 tmp = ret->children;
13012 while (tmp != NULL) {
13013 tmp->doc = NULL;
13014 tmp = tmp->next;
13015 }
13016 }
Owen Taylor3473f882001-02-23 17:55:21 +000013017 } else {
13018 ret = NULL;
13019 }
13020 xmlFreeDoc(ctxt->myDoc);
13021 ctxt->myDoc = NULL;
13022 }
13023 if (sax != NULL) ctxt->sax = NULL;
13024 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013025
Owen Taylor3473f882001-02-23 17:55:21 +000013026 return(ret);
13027}
13028
Daniel Veillard4432df22003-09-28 18:58:27 +000013029
Owen Taylor3473f882001-02-23 17:55:21 +000013030/**
13031 * xmlParseDTD:
13032 * @ExternalID: a NAME* containing the External ID of the DTD
13033 * @SystemID: a NAME* containing the URL to the DTD
13034 *
13035 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000013036 *
Owen Taylor3473f882001-02-23 17:55:21 +000013037 * Returns the resulting xmlDtdPtr or NULL in case of error.
13038 */
13039
13040xmlDtdPtr
13041xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13042 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13043}
Daniel Veillard4432df22003-09-28 18:58:27 +000013044#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013045
13046/************************************************************************
13047 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013048 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000013049 * *
13050 ************************************************************************/
13051
13052/**
Owen Taylor3473f882001-02-23 17:55:21 +000013053 * xmlParseCtxtExternalEntity:
13054 * @ctx: the existing parsing context
13055 * @URL: the URL for the entity to load
13056 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013057 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013058 *
13059 * Parse an external general entity within an existing parsing context
13060 * An external general parsed entity is well-formed if it matches the
13061 * production labeled extParsedEnt.
13062 *
13063 * [78] extParsedEnt ::= TextDecl? content
13064 *
13065 * Returns 0 if the entity is well formed, -1 in case of args problem and
13066 * the parser error code otherwise
13067 */
13068
13069int
13070xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013071 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000013072 xmlParserCtxtPtr ctxt;
13073 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013074 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013075 xmlSAXHandlerPtr oldsax = NULL;
13076 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013077 xmlChar start[4];
13078 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013079
Daniel Veillardce682bc2004-11-05 17:22:25 +000013080 if (ctx == NULL) return(-1);
13081
Daniel Veillard0161e632008-08-28 15:36:32 +000013082 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13083 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013084 return(XML_ERR_ENTITY_LOOP);
13085 }
13086
Daniel Veillardcda96922001-08-21 10:56:31 +000013087 if (lst != NULL)
13088 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013089 if ((URL == NULL) && (ID == NULL))
13090 return(-1);
13091 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13092 return(-1);
13093
Rob Richards798743a2009-06-19 13:54:25 -040013094 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000013095 if (ctxt == NULL) {
13096 return(-1);
13097 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013098
Owen Taylor3473f882001-02-23 17:55:21 +000013099 oldsax = ctxt->sax;
13100 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013101 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013102 newDoc = xmlNewDoc(BAD_CAST "1.0");
13103 if (newDoc == NULL) {
13104 xmlFreeParserCtxt(ctxt);
13105 return(-1);
13106 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013107 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013108 if (ctx->myDoc->dict) {
13109 newDoc->dict = ctx->myDoc->dict;
13110 xmlDictReference(newDoc->dict);
13111 }
Owen Taylor3473f882001-02-23 17:55:21 +000013112 if (ctx->myDoc != NULL) {
13113 newDoc->intSubset = ctx->myDoc->intSubset;
13114 newDoc->extSubset = ctx->myDoc->extSubset;
13115 }
13116 if (ctx->myDoc->URL != NULL) {
13117 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13118 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013119 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13120 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013121 ctxt->sax = oldsax;
13122 xmlFreeParserCtxt(ctxt);
13123 newDoc->intSubset = NULL;
13124 newDoc->extSubset = NULL;
13125 xmlFreeDoc(newDoc);
13126 return(-1);
13127 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013128 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013129 nodePush(ctxt, newDoc->children);
13130 if (ctx->myDoc == NULL) {
13131 ctxt->myDoc = newDoc;
13132 } else {
13133 ctxt->myDoc = ctx->myDoc;
13134 newDoc->children->doc = ctx->myDoc;
13135 }
13136
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013137 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013138 * Get the 4 first bytes and decode the charset
13139 * if enc != XML_CHAR_ENCODING_NONE
13140 * plug some encoding conversion routines.
13141 */
13142 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013143 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13144 start[0] = RAW;
13145 start[1] = NXT(1);
13146 start[2] = NXT(2);
13147 start[3] = NXT(3);
13148 enc = xmlDetectCharEncoding(start, 4);
13149 if (enc != XML_CHAR_ENCODING_NONE) {
13150 xmlSwitchEncoding(ctxt, enc);
13151 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013152 }
13153
Owen Taylor3473f882001-02-23 17:55:21 +000013154 /*
13155 * Parse a possible text declaration first
13156 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013157 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013158 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013159 /*
13160 * An XML-1.0 document can't reference an entity not XML-1.0
13161 */
13162 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13163 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013164 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013165 "Version mismatch between document and entity\n");
13166 }
Owen Taylor3473f882001-02-23 17:55:21 +000013167 }
13168
13169 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080013170 * If the user provided its own SAX callbacks then reuse the
13171 * useData callback field, otherwise the expected setup in a
13172 * DOM builder is to have userData == ctxt
13173 */
13174 if (ctx->userData == ctx)
13175 ctxt->userData = ctxt;
13176 else
13177 ctxt->userData = ctx->userData;
13178
13179 /*
Owen Taylor3473f882001-02-23 17:55:21 +000013180 * Doing validity checking on chunk doesn't make sense
13181 */
13182 ctxt->instate = XML_PARSER_CONTENT;
13183 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013184 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013185 ctxt->loadsubset = ctx->loadsubset;
13186 ctxt->depth = ctx->depth + 1;
13187 ctxt->replaceEntities = ctx->replaceEntities;
13188 if (ctxt->validate) {
13189 ctxt->vctxt.error = ctx->vctxt.error;
13190 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000013191 } else {
13192 ctxt->vctxt.error = NULL;
13193 ctxt->vctxt.warning = NULL;
13194 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000013195 ctxt->vctxt.nodeTab = NULL;
13196 ctxt->vctxt.nodeNr = 0;
13197 ctxt->vctxt.nodeMax = 0;
13198 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013199 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13200 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013201 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13202 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13203 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013204 ctxt->dictNames = ctx->dictNames;
13205 ctxt->attsDefault = ctx->attsDefault;
13206 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013207 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013208
13209 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013210
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013211 ctx->validate = ctxt->validate;
13212 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013213 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013214 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013215 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013216 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013217 }
13218 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013219 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013220 }
13221
13222 if (!ctxt->wellFormed) {
13223 if (ctxt->errNo == 0)
13224 ret = 1;
13225 else
13226 ret = ctxt->errNo;
13227 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013228 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013229 xmlNodePtr cur;
13230
13231 /*
13232 * Return the newly created nodeset after unlinking it from
13233 * they pseudo parent.
13234 */
13235 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013236 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013237 while (cur != NULL) {
13238 cur->parent = NULL;
13239 cur = cur->next;
13240 }
13241 newDoc->children->children = NULL;
13242 }
13243 ret = 0;
13244 }
13245 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013246 ctxt->dict = NULL;
13247 ctxt->attsDefault = NULL;
13248 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013249 xmlFreeParserCtxt(ctxt);
13250 newDoc->intSubset = NULL;
13251 newDoc->extSubset = NULL;
13252 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013253
Owen Taylor3473f882001-02-23 17:55:21 +000013254 return(ret);
13255}
13256
13257/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013258 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013259 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013260 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013261 * @sax: the SAX handler bloc (possibly NULL)
13262 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13263 * @depth: Used for loop detection, use 0
13264 * @URL: the URL for the entity to load
13265 * @ID: the System ID for the entity to load
13266 * @list: the return value for the set of parsed nodes
13267 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013268 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013269 *
13270 * Returns 0 if the entity is well formed, -1 in case of args problem and
13271 * the parser error code otherwise
13272 */
13273
Daniel Veillard7d515752003-09-26 19:12:37 +000013274static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013275xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13276 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013277 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013278 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013279 xmlParserCtxtPtr ctxt;
13280 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013281 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013282 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013283 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013284 xmlChar start[4];
13285 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013286
Daniel Veillard0161e632008-08-28 15:36:32 +000013287 if (((depth > 40) &&
13288 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13289 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013290 return(XML_ERR_ENTITY_LOOP);
13291 }
13292
Owen Taylor3473f882001-02-23 17:55:21 +000013293 if (list != NULL)
13294 *list = NULL;
13295 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013296 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013297 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013298 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013299
13300
Rob Richards9c0aa472009-03-26 18:10:19 +000013301 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013302 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013303 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013304 if (oldctxt != NULL) {
13305 ctxt->_private = oldctxt->_private;
13306 ctxt->loadsubset = oldctxt->loadsubset;
13307 ctxt->validate = oldctxt->validate;
13308 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013309 ctxt->record_info = oldctxt->record_info;
13310 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13311 ctxt->node_seq.length = oldctxt->node_seq.length;
13312 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013313 } else {
13314 /*
13315 * Doing validity checking on chunk without context
13316 * doesn't make sense
13317 */
13318 ctxt->_private = NULL;
13319 ctxt->validate = 0;
13320 ctxt->external = 2;
13321 ctxt->loadsubset = 0;
13322 }
Owen Taylor3473f882001-02-23 17:55:21 +000013323 if (sax != NULL) {
13324 oldsax = ctxt->sax;
13325 ctxt->sax = sax;
13326 if (user_data != NULL)
13327 ctxt->userData = user_data;
13328 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013329 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013330 newDoc = xmlNewDoc(BAD_CAST "1.0");
13331 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013332 ctxt->node_seq.maximum = 0;
13333 ctxt->node_seq.length = 0;
13334 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013335 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013336 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013337 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013338 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013339 newDoc->intSubset = doc->intSubset;
13340 newDoc->extSubset = doc->extSubset;
13341 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013342 xmlDictReference(newDoc->dict);
13343
Owen Taylor3473f882001-02-23 17:55:21 +000013344 if (doc->URL != NULL) {
13345 newDoc->URL = xmlStrdup(doc->URL);
13346 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013347 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13348 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013349 if (sax != NULL)
13350 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013351 ctxt->node_seq.maximum = 0;
13352 ctxt->node_seq.length = 0;
13353 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013354 xmlFreeParserCtxt(ctxt);
13355 newDoc->intSubset = NULL;
13356 newDoc->extSubset = NULL;
13357 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013358 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013359 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013360 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013361 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013362 ctxt->myDoc = doc;
13363 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013364
Daniel Veillard0161e632008-08-28 15:36:32 +000013365 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013366 * Get the 4 first bytes and decode the charset
13367 * if enc != XML_CHAR_ENCODING_NONE
13368 * plug some encoding conversion routines.
13369 */
13370 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013371 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13372 start[0] = RAW;
13373 start[1] = NXT(1);
13374 start[2] = NXT(2);
13375 start[3] = NXT(3);
13376 enc = xmlDetectCharEncoding(start, 4);
13377 if (enc != XML_CHAR_ENCODING_NONE) {
13378 xmlSwitchEncoding(ctxt, enc);
13379 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013380 }
13381
Owen Taylor3473f882001-02-23 17:55:21 +000013382 /*
13383 * Parse a possible text declaration first
13384 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013385 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013386 xmlParseTextDecl(ctxt);
13387 }
13388
Owen Taylor3473f882001-02-23 17:55:21 +000013389 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013390 ctxt->depth = depth;
13391
13392 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013393
Daniel Veillard561b7f82002-03-20 21:55:57 +000013394 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013395 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013396 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013397 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013398 }
13399 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013400 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013401 }
13402
13403 if (!ctxt->wellFormed) {
13404 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013405 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013406 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013407 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013408 } else {
13409 if (list != NULL) {
13410 xmlNodePtr cur;
13411
13412 /*
13413 * Return the newly created nodeset after unlinking it from
13414 * they pseudo parent.
13415 */
13416 cur = newDoc->children->children;
13417 *list = cur;
13418 while (cur != NULL) {
13419 cur->parent = NULL;
13420 cur = cur->next;
13421 }
13422 newDoc->children->children = NULL;
13423 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013424 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013425 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013426
13427 /*
13428 * Record in the parent context the number of entities replacement
13429 * done when parsing that reference.
13430 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013431 if (oldctxt != NULL)
13432 oldctxt->nbentities += ctxt->nbentities;
13433
Daniel Veillard0161e632008-08-28 15:36:32 +000013434 /*
13435 * Also record the size of the entity parsed
13436 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013437 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013438 oldctxt->sizeentities += ctxt->input->consumed;
13439 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13440 }
13441 /*
13442 * And record the last error if any
13443 */
13444 if (ctxt->lastError.code != XML_ERR_OK)
13445 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13446
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013447 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013448 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013449 if (oldctxt != NULL) {
13450 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13451 oldctxt->node_seq.length = ctxt->node_seq.length;
13452 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13453 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013454 ctxt->node_seq.maximum = 0;
13455 ctxt->node_seq.length = 0;
13456 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013457 xmlFreeParserCtxt(ctxt);
13458 newDoc->intSubset = NULL;
13459 newDoc->extSubset = NULL;
13460 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013461
Owen Taylor3473f882001-02-23 17:55:21 +000013462 return(ret);
13463}
13464
Daniel Veillard81273902003-09-30 00:43:48 +000013465#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013466/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013467 * xmlParseExternalEntity:
13468 * @doc: the document the chunk pertains to
13469 * @sax: the SAX handler bloc (possibly NULL)
13470 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13471 * @depth: Used for loop detection, use 0
13472 * @URL: the URL for the entity to load
13473 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013474 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013475 *
13476 * Parse an external general entity
13477 * An external general parsed entity is well-formed if it matches the
13478 * production labeled extParsedEnt.
13479 *
13480 * [78] extParsedEnt ::= TextDecl? content
13481 *
13482 * Returns 0 if the entity is well formed, -1 in case of args problem and
13483 * the parser error code otherwise
13484 */
13485
13486int
13487xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013488 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013489 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013490 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013491}
13492
13493/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013494 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013495 * @doc: the document the chunk pertains to
13496 * @sax: the SAX handler bloc (possibly NULL)
13497 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13498 * @depth: Used for loop detection, use 0
13499 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013500 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013501 *
13502 * Parse a well-balanced chunk of an XML document
13503 * called by the parser
13504 * The allowed sequence for the Well Balanced Chunk is the one defined by
13505 * the content production in the XML grammar:
13506 *
13507 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13508 *
13509 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13510 * the parser error code otherwise
13511 */
13512
13513int
13514xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013515 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013516 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13517 depth, string, lst, 0 );
13518}
Daniel Veillard81273902003-09-30 00:43:48 +000013519#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013520
13521/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013522 * xmlParseBalancedChunkMemoryInternal:
13523 * @oldctxt: the existing parsing context
13524 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13525 * @user_data: the user data field for the parser context
13526 * @lst: the return value for the set of parsed nodes
13527 *
13528 *
13529 * Parse a well-balanced chunk of an XML document
13530 * called by the parser
13531 * The allowed sequence for the Well Balanced Chunk is the one defined by
13532 * the content production in the XML grammar:
13533 *
13534 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13535 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013536 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13537 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013538 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013539 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013540 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013541 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013542static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013543xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13544 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13545 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013546 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013547 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013548 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013549 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013550 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013551 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013552 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013553#ifdef SAX2
13554 int i;
13555#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013556
Daniel Veillard0161e632008-08-28 15:36:32 +000013557 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13558 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013559 return(XML_ERR_ENTITY_LOOP);
13560 }
13561
13562
13563 if (lst != NULL)
13564 *lst = NULL;
13565 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013566 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013567
13568 size = xmlStrlen(string);
13569
13570 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013571 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013572 if (user_data != NULL)
13573 ctxt->userData = user_data;
13574 else
13575 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013576 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13577 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013578 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13579 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13580 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013581
Daniel Veillard74eaec12009-08-26 15:57:20 +020013582#ifdef SAX2
13583 /* propagate namespaces down the entity */
13584 for (i = 0;i < oldctxt->nsNr;i += 2) {
13585 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13586 }
13587#endif
13588
Daniel Veillard328f48c2002-11-15 15:24:34 +000013589 oldsax = ctxt->sax;
13590 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013591 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013592 ctxt->replaceEntities = oldctxt->replaceEntities;
13593 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013594
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013595 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013596 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013597 newDoc = xmlNewDoc(BAD_CAST "1.0");
13598 if (newDoc == NULL) {
13599 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013600 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013601 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013602 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013603 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013604 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013605 newDoc->dict = ctxt->dict;
13606 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013607 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013608 } else {
13609 ctxt->myDoc = oldctxt->myDoc;
13610 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013611 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013612 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013613 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13614 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013615 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013616 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013617 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013618 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013619 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013620 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013621 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013622 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013623 ctxt->myDoc->children = NULL;
13624 ctxt->myDoc->last = NULL;
13625 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013626 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013627 ctxt->instate = XML_PARSER_CONTENT;
13628 ctxt->depth = oldctxt->depth + 1;
13629
Daniel Veillard328f48c2002-11-15 15:24:34 +000013630 ctxt->validate = 0;
13631 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013632 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13633 /*
13634 * ID/IDREF registration will be done in xmlValidateElement below
13635 */
13636 ctxt->loadsubset |= XML_SKIP_IDS;
13637 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013638 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013639 ctxt->attsDefault = oldctxt->attsDefault;
13640 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013641
Daniel Veillard68e9e742002-11-16 15:35:11 +000013642 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013643 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013644 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013645 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013646 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013647 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013648 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013649 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013650 }
13651
13652 if (!ctxt->wellFormed) {
13653 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013654 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013655 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013656 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013657 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013658 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013659 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013660
William M. Brack7b9154b2003-09-27 19:23:50 +000013661 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013662 xmlNodePtr cur;
13663
13664 /*
13665 * Return the newly created nodeset after unlinking it from
13666 * they pseudo parent.
13667 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013668 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013669 *lst = cur;
13670 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013671#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013672 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13673 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13674 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013675 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13676 oldctxt->myDoc, cur);
13677 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013678#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013679 cur->parent = NULL;
13680 cur = cur->next;
13681 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013682 ctxt->myDoc->children->children = NULL;
13683 }
13684 if (ctxt->myDoc != NULL) {
13685 xmlFreeNode(ctxt->myDoc->children);
13686 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013687 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013688 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013689
13690 /*
13691 * Record in the parent context the number of entities replacement
13692 * done when parsing that reference.
13693 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013694 if (oldctxt != NULL)
13695 oldctxt->nbentities += ctxt->nbentities;
13696
Daniel Veillard0161e632008-08-28 15:36:32 +000013697 /*
13698 * Also record the last error if any
13699 */
13700 if (ctxt->lastError.code != XML_ERR_OK)
13701 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13702
Daniel Veillard328f48c2002-11-15 15:24:34 +000013703 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013704 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013705 ctxt->attsDefault = NULL;
13706 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013707 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013708 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013709 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013710 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013711
Daniel Veillard328f48c2002-11-15 15:24:34 +000013712 return(ret);
13713}
13714
Daniel Veillard29b17482004-08-16 00:39:03 +000013715/**
13716 * xmlParseInNodeContext:
13717 * @node: the context node
13718 * @data: the input string
13719 * @datalen: the input string length in bytes
13720 * @options: a combination of xmlParserOption
13721 * @lst: the return value for the set of parsed nodes
13722 *
13723 * Parse a well-balanced chunk of an XML document
13724 * within the context (DTD, namespaces, etc ...) of the given node.
13725 *
13726 * The allowed sequence for the data is a Well Balanced Chunk defined by
13727 * the content production in the XML grammar:
13728 *
13729 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13730 *
13731 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13732 * error code otherwise
13733 */
13734xmlParserErrors
13735xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13736 int options, xmlNodePtr *lst) {
13737#ifdef SAX2
13738 xmlParserCtxtPtr ctxt;
13739 xmlDocPtr doc = NULL;
13740 xmlNodePtr fake, cur;
13741 int nsnr = 0;
13742
13743 xmlParserErrors ret = XML_ERR_OK;
13744
13745 /*
13746 * check all input parameters, grab the document
13747 */
13748 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13749 return(XML_ERR_INTERNAL_ERROR);
13750 switch (node->type) {
13751 case XML_ELEMENT_NODE:
13752 case XML_ATTRIBUTE_NODE:
13753 case XML_TEXT_NODE:
13754 case XML_CDATA_SECTION_NODE:
13755 case XML_ENTITY_REF_NODE:
13756 case XML_PI_NODE:
13757 case XML_COMMENT_NODE:
13758 case XML_DOCUMENT_NODE:
13759 case XML_HTML_DOCUMENT_NODE:
13760 break;
13761 default:
13762 return(XML_ERR_INTERNAL_ERROR);
13763
13764 }
13765 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13766 (node->type != XML_DOCUMENT_NODE) &&
13767 (node->type != XML_HTML_DOCUMENT_NODE))
13768 node = node->parent;
13769 if (node == NULL)
13770 return(XML_ERR_INTERNAL_ERROR);
13771 if (node->type == XML_ELEMENT_NODE)
13772 doc = node->doc;
13773 else
13774 doc = (xmlDocPtr) node;
13775 if (doc == NULL)
13776 return(XML_ERR_INTERNAL_ERROR);
13777
13778 /*
13779 * allocate a context and set-up everything not related to the
13780 * node position in the tree
13781 */
13782 if (doc->type == XML_DOCUMENT_NODE)
13783 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13784#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013785 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013786 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013787 /*
13788 * When parsing in context, it makes no sense to add implied
13789 * elements like html/body/etc...
13790 */
13791 options |= HTML_PARSE_NOIMPLIED;
13792 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013793#endif
13794 else
13795 return(XML_ERR_INTERNAL_ERROR);
13796
13797 if (ctxt == NULL)
13798 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013799
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013800 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013801 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13802 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13803 * we must wait until the last moment to free the original one.
13804 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013805 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013806 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013807 xmlDictFree(ctxt->dict);
13808 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013809 } else
13810 options |= XML_PARSE_NODICT;
13811
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013812 if (doc->encoding != NULL) {
13813 xmlCharEncodingHandlerPtr hdlr;
13814
13815 if (ctxt->encoding != NULL)
13816 xmlFree((xmlChar *) ctxt->encoding);
13817 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13818
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013819 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013820 if (hdlr != NULL) {
13821 xmlSwitchToEncoding(ctxt, hdlr);
13822 } else {
13823 return(XML_ERR_UNSUPPORTED_ENCODING);
13824 }
13825 }
13826
Daniel Veillard37334572008-07-31 08:20:02 +000013827 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013828 xmlDetectSAX2(ctxt);
13829 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013830 /* parsing in context, i.e. as within existing content */
13831 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013832
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013833 fake = xmlNewComment(NULL);
13834 if (fake == NULL) {
13835 xmlFreeParserCtxt(ctxt);
13836 return(XML_ERR_NO_MEMORY);
13837 }
13838 xmlAddChild(node, fake);
13839
Daniel Veillard29b17482004-08-16 00:39:03 +000013840 if (node->type == XML_ELEMENT_NODE) {
13841 nodePush(ctxt, node);
13842 /*
13843 * initialize the SAX2 namespaces stack
13844 */
13845 cur = node;
13846 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13847 xmlNsPtr ns = cur->nsDef;
13848 const xmlChar *iprefix, *ihref;
13849
13850 while (ns != NULL) {
13851 if (ctxt->dict) {
13852 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13853 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13854 } else {
13855 iprefix = ns->prefix;
13856 ihref = ns->href;
13857 }
13858
13859 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13860 nsPush(ctxt, iprefix, ihref);
13861 nsnr++;
13862 }
13863 ns = ns->next;
13864 }
13865 cur = cur->parent;
13866 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013867 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013868
13869 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13870 /*
13871 * ID/IDREF registration will be done in xmlValidateElement below
13872 */
13873 ctxt->loadsubset |= XML_SKIP_IDS;
13874 }
13875
Daniel Veillard499cc922006-01-18 17:22:35 +000013876#ifdef LIBXML_HTML_ENABLED
13877 if (doc->type == XML_HTML_DOCUMENT_NODE)
13878 __htmlParseContent(ctxt);
13879 else
13880#endif
13881 xmlParseContent(ctxt);
13882
Daniel Veillard29b17482004-08-16 00:39:03 +000013883 nsPop(ctxt, nsnr);
13884 if ((RAW == '<') && (NXT(1) == '/')) {
13885 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13886 } else if (RAW != 0) {
13887 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13888 }
13889 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13890 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13891 ctxt->wellFormed = 0;
13892 }
13893
13894 if (!ctxt->wellFormed) {
13895 if (ctxt->errNo == 0)
13896 ret = XML_ERR_INTERNAL_ERROR;
13897 else
13898 ret = (xmlParserErrors)ctxt->errNo;
13899 } else {
13900 ret = XML_ERR_OK;
13901 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013902
Daniel Veillard29b17482004-08-16 00:39:03 +000013903 /*
13904 * Return the newly created nodeset after unlinking it from
13905 * the pseudo sibling.
13906 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013907
Daniel Veillard29b17482004-08-16 00:39:03 +000013908 cur = fake->next;
13909 fake->next = NULL;
13910 node->last = fake;
13911
13912 if (cur != NULL) {
13913 cur->prev = NULL;
13914 }
13915
13916 *lst = cur;
13917
13918 while (cur != NULL) {
13919 cur->parent = NULL;
13920 cur = cur->next;
13921 }
13922
13923 xmlUnlinkNode(fake);
13924 xmlFreeNode(fake);
13925
13926
13927 if (ret != XML_ERR_OK) {
13928 xmlFreeNodeList(*lst);
13929 *lst = NULL;
13930 }
William M. Brackc3f81342004-10-03 01:22:44 +000013931
William M. Brackb7b54de2004-10-06 16:38:01 +000013932 if (doc->dict != NULL)
13933 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013934 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013935
Daniel Veillard29b17482004-08-16 00:39:03 +000013936 return(ret);
13937#else /* !SAX2 */
13938 return(XML_ERR_INTERNAL_ERROR);
13939#endif
13940}
13941
Daniel Veillard81273902003-09-30 00:43:48 +000013942#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013943/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013944 * xmlParseBalancedChunkMemoryRecover:
13945 * @doc: the document the chunk pertains to
13946 * @sax: the SAX handler bloc (possibly NULL)
13947 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13948 * @depth: Used for loop detection, use 0
13949 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13950 * @lst: the return value for the set of parsed nodes
13951 * @recover: return nodes even if the data is broken (use 0)
13952 *
13953 *
13954 * Parse a well-balanced chunk of an XML document
13955 * called by the parser
13956 * The allowed sequence for the Well Balanced Chunk is the one defined by
13957 * the content production in the XML grammar:
13958 *
13959 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13960 *
13961 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13962 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013963 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013964 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013965 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13966 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013967 */
13968int
13969xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013970 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013971 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013972 xmlParserCtxtPtr ctxt;
13973 xmlDocPtr newDoc;
13974 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013975 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013976 int size;
13977 int ret = 0;
13978
Daniel Veillard0161e632008-08-28 15:36:32 +000013979 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013980 return(XML_ERR_ENTITY_LOOP);
13981 }
13982
13983
Daniel Veillardcda96922001-08-21 10:56:31 +000013984 if (lst != NULL)
13985 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013986 if (string == NULL)
13987 return(-1);
13988
13989 size = xmlStrlen(string);
13990
13991 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13992 if (ctxt == NULL) return(-1);
13993 ctxt->userData = ctxt;
13994 if (sax != NULL) {
13995 oldsax = ctxt->sax;
13996 ctxt->sax = sax;
13997 if (user_data != NULL)
13998 ctxt->userData = user_data;
13999 }
14000 newDoc = xmlNewDoc(BAD_CAST "1.0");
14001 if (newDoc == NULL) {
14002 xmlFreeParserCtxt(ctxt);
14003 return(-1);
14004 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000014005 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014006 if ((doc != NULL) && (doc->dict != NULL)) {
14007 xmlDictFree(ctxt->dict);
14008 ctxt->dict = doc->dict;
14009 xmlDictReference(ctxt->dict);
14010 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
14011 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
14012 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
14013 ctxt->dictNames = 1;
14014 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000014015 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014016 }
Owen Taylor3473f882001-02-23 17:55:21 +000014017 if (doc != NULL) {
14018 newDoc->intSubset = doc->intSubset;
14019 newDoc->extSubset = doc->extSubset;
14020 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014021 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14022 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000014023 if (sax != NULL)
14024 ctxt->sax = oldsax;
14025 xmlFreeParserCtxt(ctxt);
14026 newDoc->intSubset = NULL;
14027 newDoc->extSubset = NULL;
14028 xmlFreeDoc(newDoc);
14029 return(-1);
14030 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014031 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14032 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000014033 if (doc == NULL) {
14034 ctxt->myDoc = newDoc;
14035 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000014036 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000014037 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000014038 /* Ensure that doc has XML spec namespace */
14039 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14040 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000014041 }
14042 ctxt->instate = XML_PARSER_CONTENT;
14043 ctxt->depth = depth;
14044
14045 /*
14046 * Doing validity checking on chunk doesn't make sense
14047 */
14048 ctxt->validate = 0;
14049 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014050 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014051
Daniel Veillardb39bc392002-10-26 19:29:51 +000014052 if ( doc != NULL ){
14053 content = doc->children;
14054 doc->children = NULL;
14055 xmlParseContent(ctxt);
14056 doc->children = content;
14057 }
14058 else {
14059 xmlParseContent(ctxt);
14060 }
Owen Taylor3473f882001-02-23 17:55:21 +000014061 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014062 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014063 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014064 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014065 }
14066 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014067 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014068 }
14069
14070 if (!ctxt->wellFormed) {
14071 if (ctxt->errNo == 0)
14072 ret = 1;
14073 else
14074 ret = ctxt->errNo;
14075 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000014076 ret = 0;
14077 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014078
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014079 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14080 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000014081
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014082 /*
14083 * Return the newly created nodeset after unlinking it from
14084 * they pseudo parent.
14085 */
14086 cur = newDoc->children->children;
14087 *lst = cur;
14088 while (cur != NULL) {
14089 xmlSetTreeDoc(cur, doc);
14090 cur->parent = NULL;
14091 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000014092 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014093 newDoc->children->children = NULL;
14094 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014095
14096 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000014097 ctxt->sax = oldsax;
14098 xmlFreeParserCtxt(ctxt);
14099 newDoc->intSubset = NULL;
14100 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000014101 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014102 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000014103
Owen Taylor3473f882001-02-23 17:55:21 +000014104 return(ret);
14105}
14106
14107/**
14108 * xmlSAXParseEntity:
14109 * @sax: the SAX handler block
14110 * @filename: the filename
14111 *
14112 * parse an XML external entity out of context and build a tree.
14113 * It use the given SAX function block to handle the parsing callback.
14114 * If sax is NULL, fallback to the default DOM tree building routines.
14115 *
14116 * [78] extParsedEnt ::= TextDecl? content
14117 *
14118 * This correspond to a "Well Balanced" chunk
14119 *
14120 * Returns the resulting document tree
14121 */
14122
14123xmlDocPtr
14124xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14125 xmlDocPtr ret;
14126 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014127
14128 ctxt = xmlCreateFileParserCtxt(filename);
14129 if (ctxt == NULL) {
14130 return(NULL);
14131 }
14132 if (sax != NULL) {
14133 if (ctxt->sax != NULL)
14134 xmlFree(ctxt->sax);
14135 ctxt->sax = sax;
14136 ctxt->userData = NULL;
14137 }
14138
Owen Taylor3473f882001-02-23 17:55:21 +000014139 xmlParseExtParsedEnt(ctxt);
14140
14141 if (ctxt->wellFormed)
14142 ret = ctxt->myDoc;
14143 else {
14144 ret = NULL;
14145 xmlFreeDoc(ctxt->myDoc);
14146 ctxt->myDoc = NULL;
14147 }
14148 if (sax != NULL)
14149 ctxt->sax = NULL;
14150 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000014151
Owen Taylor3473f882001-02-23 17:55:21 +000014152 return(ret);
14153}
14154
14155/**
14156 * xmlParseEntity:
14157 * @filename: the filename
14158 *
14159 * parse an XML external entity out of context and build a tree.
14160 *
14161 * [78] extParsedEnt ::= TextDecl? content
14162 *
14163 * This correspond to a "Well Balanced" chunk
14164 *
14165 * Returns the resulting document tree
14166 */
14167
14168xmlDocPtr
14169xmlParseEntity(const char *filename) {
14170 return(xmlSAXParseEntity(NULL, filename));
14171}
Daniel Veillard81273902003-09-30 00:43:48 +000014172#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014173
14174/**
Rob Richards9c0aa472009-03-26 18:10:19 +000014175 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000014176 * @URL: the entity URL
14177 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000014178 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000014179 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000014180 *
14181 * Create a parser context for an external entity
14182 * Automatic support for ZLIB/Compress compressed document is provided
14183 * by default if found at compile-time.
14184 *
14185 * Returns the new parser context or NULL
14186 */
Rob Richards9c0aa472009-03-26 18:10:19 +000014187static xmlParserCtxtPtr
14188xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14189 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000014190 xmlParserCtxtPtr ctxt;
14191 xmlParserInputPtr inputStream;
14192 char *directory = NULL;
14193 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000014194
Owen Taylor3473f882001-02-23 17:55:21 +000014195 ctxt = xmlNewParserCtxt();
14196 if (ctxt == NULL) {
14197 return(NULL);
14198 }
14199
Daniel Veillard48247b42009-07-10 16:12:46 +020014200 if (pctx != NULL) {
14201 ctxt->options = pctx->options;
14202 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014203 }
14204
Owen Taylor3473f882001-02-23 17:55:21 +000014205 uri = xmlBuildURI(URL, base);
14206
14207 if (uri == NULL) {
14208 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14209 if (inputStream == NULL) {
14210 xmlFreeParserCtxt(ctxt);
14211 return(NULL);
14212 }
14213
14214 inputPush(ctxt, inputStream);
14215
14216 if ((ctxt->directory == NULL) && (directory == NULL))
14217 directory = xmlParserGetDirectory((char *)URL);
14218 if ((ctxt->directory == NULL) && (directory != NULL))
14219 ctxt->directory = directory;
14220 } else {
14221 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14222 if (inputStream == NULL) {
14223 xmlFree(uri);
14224 xmlFreeParserCtxt(ctxt);
14225 return(NULL);
14226 }
14227
14228 inputPush(ctxt, inputStream);
14229
14230 if ((ctxt->directory == NULL) && (directory == NULL))
14231 directory = xmlParserGetDirectory((char *)uri);
14232 if ((ctxt->directory == NULL) && (directory != NULL))
14233 ctxt->directory = directory;
14234 xmlFree(uri);
14235 }
Owen Taylor3473f882001-02-23 17:55:21 +000014236 return(ctxt);
14237}
14238
Rob Richards9c0aa472009-03-26 18:10:19 +000014239/**
14240 * xmlCreateEntityParserCtxt:
14241 * @URL: the entity URL
14242 * @ID: the entity PUBLIC ID
14243 * @base: a possible base for the target URI
14244 *
14245 * Create a parser context for an external entity
14246 * Automatic support for ZLIB/Compress compressed document is provided
14247 * by default if found at compile-time.
14248 *
14249 * Returns the new parser context or NULL
14250 */
14251xmlParserCtxtPtr
14252xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14253 const xmlChar *base) {
14254 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14255
14256}
14257
Owen Taylor3473f882001-02-23 17:55:21 +000014258/************************************************************************
14259 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014260 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014261 * *
14262 ************************************************************************/
14263
14264/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014265 * xmlCreateURLParserCtxt:
14266 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014267 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014268 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014269 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014270 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014271 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014272 *
14273 * Returns the new parser context or NULL
14274 */
14275xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014276xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014277{
14278 xmlParserCtxtPtr ctxt;
14279 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014280 char *directory = NULL;
14281
Owen Taylor3473f882001-02-23 17:55:21 +000014282 ctxt = xmlNewParserCtxt();
14283 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014284 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014285 return(NULL);
14286 }
14287
Daniel Veillarddf292f72005-01-16 19:00:15 +000014288 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014289 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014290 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014291
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014292 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014293 if (inputStream == NULL) {
14294 xmlFreeParserCtxt(ctxt);
14295 return(NULL);
14296 }
14297
Owen Taylor3473f882001-02-23 17:55:21 +000014298 inputPush(ctxt, inputStream);
14299 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014300 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014301 if ((ctxt->directory == NULL) && (directory != NULL))
14302 ctxt->directory = directory;
14303
14304 return(ctxt);
14305}
14306
Daniel Veillard61b93382003-11-03 14:28:31 +000014307/**
14308 * xmlCreateFileParserCtxt:
14309 * @filename: the filename
14310 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014311 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014312 * Automatic support for ZLIB/Compress compressed document is provided
14313 * by default if found at compile-time.
14314 *
14315 * Returns the new parser context or NULL
14316 */
14317xmlParserCtxtPtr
14318xmlCreateFileParserCtxt(const char *filename)
14319{
14320 return(xmlCreateURLParserCtxt(filename, 0));
14321}
14322
Daniel Veillard81273902003-09-30 00:43:48 +000014323#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014324/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014325 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014326 * @sax: the SAX handler block
14327 * @filename: the filename
14328 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14329 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014330 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014331 *
14332 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14333 * compressed document is provided by default if found at compile-time.
14334 * It use the given SAX function block to handle the parsing callback.
14335 * If sax is NULL, fallback to the default DOM tree building routines.
14336 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014337 * User data (void *) is stored within the parser context in the
14338 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014339 *
Owen Taylor3473f882001-02-23 17:55:21 +000014340 * Returns the resulting document tree
14341 */
14342
14343xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014344xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14345 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014346 xmlDocPtr ret;
14347 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014348
Daniel Veillard635ef722001-10-29 11:48:19 +000014349 xmlInitParser();
14350
Owen Taylor3473f882001-02-23 17:55:21 +000014351 ctxt = xmlCreateFileParserCtxt(filename);
14352 if (ctxt == NULL) {
14353 return(NULL);
14354 }
14355 if (sax != NULL) {
14356 if (ctxt->sax != NULL)
14357 xmlFree(ctxt->sax);
14358 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014359 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014360 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014361 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014362 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014363 }
Owen Taylor3473f882001-02-23 17:55:21 +000014364
Daniel Veillard37d2d162008-03-14 10:54:00 +000014365 if (ctxt->directory == NULL)
14366 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014367
Daniel Veillarddad3f682002-11-17 16:47:27 +000014368 ctxt->recovery = recovery;
14369
Owen Taylor3473f882001-02-23 17:55:21 +000014370 xmlParseDocument(ctxt);
14371
William M. Brackc07329e2003-09-08 01:57:30 +000014372 if ((ctxt->wellFormed) || recovery) {
14373 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014374 if (ret != NULL) {
14375 if (ctxt->input->buf->compressed > 0)
14376 ret->compression = 9;
14377 else
14378 ret->compression = ctxt->input->buf->compressed;
14379 }
William M. Brackc07329e2003-09-08 01:57:30 +000014380 }
Owen Taylor3473f882001-02-23 17:55:21 +000014381 else {
14382 ret = NULL;
14383 xmlFreeDoc(ctxt->myDoc);
14384 ctxt->myDoc = NULL;
14385 }
14386 if (sax != NULL)
14387 ctxt->sax = NULL;
14388 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014389
Owen Taylor3473f882001-02-23 17:55:21 +000014390 return(ret);
14391}
14392
14393/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014394 * xmlSAXParseFile:
14395 * @sax: the SAX handler block
14396 * @filename: the filename
14397 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14398 * documents
14399 *
14400 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14401 * compressed document is provided by default if found at compile-time.
14402 * It use the given SAX function block to handle the parsing callback.
14403 * If sax is NULL, fallback to the default DOM tree building routines.
14404 *
14405 * Returns the resulting document tree
14406 */
14407
14408xmlDocPtr
14409xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14410 int recovery) {
14411 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14412}
14413
14414/**
Owen Taylor3473f882001-02-23 17:55:21 +000014415 * xmlRecoverDoc:
14416 * @cur: a pointer to an array of xmlChar
14417 *
14418 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014419 * In the case the document is not Well Formed, a attempt to build a
14420 * tree is tried anyway
14421 *
14422 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014423 */
14424
14425xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014426xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014427 return(xmlSAXParseDoc(NULL, cur, 1));
14428}
14429
14430/**
14431 * xmlParseFile:
14432 * @filename: the filename
14433 *
14434 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14435 * compressed document is provided by default if found at compile-time.
14436 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014437 * Returns the resulting document tree if the file was wellformed,
14438 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014439 */
14440
14441xmlDocPtr
14442xmlParseFile(const char *filename) {
14443 return(xmlSAXParseFile(NULL, filename, 0));
14444}
14445
14446/**
14447 * xmlRecoverFile:
14448 * @filename: the filename
14449 *
14450 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14451 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014452 * In the case the document is not Well Formed, it attempts to build
14453 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014454 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014455 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014456 */
14457
14458xmlDocPtr
14459xmlRecoverFile(const char *filename) {
14460 return(xmlSAXParseFile(NULL, filename, 1));
14461}
14462
14463
14464/**
14465 * xmlSetupParserForBuffer:
14466 * @ctxt: an XML parser context
14467 * @buffer: a xmlChar * buffer
14468 * @filename: a file name
14469 *
14470 * Setup the parser context to parse a new buffer; Clears any prior
14471 * contents from the parser context. The buffer parameter must not be
14472 * NULL, but the filename parameter can be
14473 */
14474void
14475xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14476 const char* filename)
14477{
14478 xmlParserInputPtr input;
14479
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014480 if ((ctxt == NULL) || (buffer == NULL))
14481 return;
14482
Owen Taylor3473f882001-02-23 17:55:21 +000014483 input = xmlNewInputStream(ctxt);
14484 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014485 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014486 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014487 return;
14488 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014489
Owen Taylor3473f882001-02-23 17:55:21 +000014490 xmlClearParserCtxt(ctxt);
14491 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014492 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014493 input->base = buffer;
14494 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014495 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014496 inputPush(ctxt, input);
14497}
14498
14499/**
14500 * xmlSAXUserParseFile:
14501 * @sax: a SAX handler
14502 * @user_data: The user data returned on SAX callbacks
14503 * @filename: a file name
14504 *
14505 * parse an XML file and call the given SAX handler routines.
14506 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014507 *
Owen Taylor3473f882001-02-23 17:55:21 +000014508 * Returns 0 in case of success or a error number otherwise
14509 */
14510int
14511xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14512 const char *filename) {
14513 int ret = 0;
14514 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014515
Owen Taylor3473f882001-02-23 17:55:21 +000014516 ctxt = xmlCreateFileParserCtxt(filename);
14517 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014518 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014519 xmlFree(ctxt->sax);
14520 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014521 xmlDetectSAX2(ctxt);
14522
Owen Taylor3473f882001-02-23 17:55:21 +000014523 if (user_data != NULL)
14524 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014525
Owen Taylor3473f882001-02-23 17:55:21 +000014526 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014527
Owen Taylor3473f882001-02-23 17:55:21 +000014528 if (ctxt->wellFormed)
14529 ret = 0;
14530 else {
14531 if (ctxt->errNo != 0)
14532 ret = ctxt->errNo;
14533 else
14534 ret = -1;
14535 }
14536 if (sax != NULL)
14537 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014538 if (ctxt->myDoc != NULL) {
14539 xmlFreeDoc(ctxt->myDoc);
14540 ctxt->myDoc = NULL;
14541 }
Owen Taylor3473f882001-02-23 17:55:21 +000014542 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014543
Owen Taylor3473f882001-02-23 17:55:21 +000014544 return ret;
14545}
Daniel Veillard81273902003-09-30 00:43:48 +000014546#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014547
14548/************************************************************************
14549 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014550 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014551 * *
14552 ************************************************************************/
14553
14554/**
14555 * xmlCreateMemoryParserCtxt:
14556 * @buffer: a pointer to a char array
14557 * @size: the size of the array
14558 *
14559 * Create a parser context for an XML in-memory document.
14560 *
14561 * Returns the new parser context or NULL
14562 */
14563xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014564xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014565 xmlParserCtxtPtr ctxt;
14566 xmlParserInputPtr input;
14567 xmlParserInputBufferPtr buf;
14568
14569 if (buffer == NULL)
14570 return(NULL);
14571 if (size <= 0)
14572 return(NULL);
14573
14574 ctxt = xmlNewParserCtxt();
14575 if (ctxt == NULL)
14576 return(NULL);
14577
Daniel Veillard53350552003-09-18 13:35:51 +000014578 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014579 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014580 if (buf == NULL) {
14581 xmlFreeParserCtxt(ctxt);
14582 return(NULL);
14583 }
Owen Taylor3473f882001-02-23 17:55:21 +000014584
14585 input = xmlNewInputStream(ctxt);
14586 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014587 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014588 xmlFreeParserCtxt(ctxt);
14589 return(NULL);
14590 }
14591
14592 input->filename = NULL;
14593 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014594 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014595
14596 inputPush(ctxt, input);
14597 return(ctxt);
14598}
14599
Daniel Veillard81273902003-09-30 00:43:48 +000014600#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014601/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014602 * xmlSAXParseMemoryWithData:
14603 * @sax: the SAX handler block
14604 * @buffer: an pointer to a char array
14605 * @size: the size of the array
14606 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14607 * documents
14608 * @data: the userdata
14609 *
14610 * parse an XML in-memory block and use the given SAX function block
14611 * to handle the parsing callback. If sax is NULL, fallback to the default
14612 * DOM tree building routines.
14613 *
14614 * User data (void *) is stored within the parser context in the
14615 * context's _private member, so it is available nearly everywhere in libxml
14616 *
14617 * Returns the resulting document tree
14618 */
14619
14620xmlDocPtr
14621xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14622 int size, int recovery, void *data) {
14623 xmlDocPtr ret;
14624 xmlParserCtxtPtr ctxt;
14625
Daniel Veillardab2a7632009-07-09 08:45:03 +020014626 xmlInitParser();
14627
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014628 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14629 if (ctxt == NULL) return(NULL);
14630 if (sax != NULL) {
14631 if (ctxt->sax != NULL)
14632 xmlFree(ctxt->sax);
14633 ctxt->sax = sax;
14634 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014635 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014636 if (data!=NULL) {
14637 ctxt->_private=data;
14638 }
14639
Daniel Veillardadba5f12003-04-04 16:09:01 +000014640 ctxt->recovery = recovery;
14641
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014642 xmlParseDocument(ctxt);
14643
14644 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14645 else {
14646 ret = NULL;
14647 xmlFreeDoc(ctxt->myDoc);
14648 ctxt->myDoc = NULL;
14649 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014650 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014651 ctxt->sax = NULL;
14652 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014653
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014654 return(ret);
14655}
14656
14657/**
Owen Taylor3473f882001-02-23 17:55:21 +000014658 * xmlSAXParseMemory:
14659 * @sax: the SAX handler block
14660 * @buffer: an pointer to a char array
14661 * @size: the size of the array
14662 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14663 * documents
14664 *
14665 * parse an XML in-memory block and use the given SAX function block
14666 * to handle the parsing callback. If sax is NULL, fallback to the default
14667 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014668 *
Owen Taylor3473f882001-02-23 17:55:21 +000014669 * Returns the resulting document tree
14670 */
14671xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014672xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14673 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014674 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014675}
14676
14677/**
14678 * xmlParseMemory:
14679 * @buffer: an pointer to a char array
14680 * @size: the size of the array
14681 *
14682 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014683 *
Owen Taylor3473f882001-02-23 17:55:21 +000014684 * Returns the resulting document tree
14685 */
14686
Daniel Veillard50822cb2001-07-26 20:05:51 +000014687xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014688 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14689}
14690
14691/**
14692 * xmlRecoverMemory:
14693 * @buffer: an pointer to a char array
14694 * @size: the size of the array
14695 *
14696 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014697 * In the case the document is not Well Formed, an attempt to
14698 * build a tree is tried anyway
14699 *
14700 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014701 */
14702
Daniel Veillard50822cb2001-07-26 20:05:51 +000014703xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014704 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14705}
14706
14707/**
14708 * xmlSAXUserParseMemory:
14709 * @sax: a SAX handler
14710 * @user_data: The user data returned on SAX callbacks
14711 * @buffer: an in-memory XML document input
14712 * @size: the length of the XML document in bytes
14713 *
14714 * A better SAX parsing routine.
14715 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014716 *
Owen Taylor3473f882001-02-23 17:55:21 +000014717 * Returns 0 in case of success or a error number otherwise
14718 */
14719int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014720 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014721 int ret = 0;
14722 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014723
14724 xmlInitParser();
14725
Owen Taylor3473f882001-02-23 17:55:21 +000014726 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14727 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014728 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14729 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014730 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014731 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014732
Daniel Veillard30211a02001-04-26 09:33:18 +000014733 if (user_data != NULL)
14734 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014735
Owen Taylor3473f882001-02-23 17:55:21 +000014736 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014737
Owen Taylor3473f882001-02-23 17:55:21 +000014738 if (ctxt->wellFormed)
14739 ret = 0;
14740 else {
14741 if (ctxt->errNo != 0)
14742 ret = ctxt->errNo;
14743 else
14744 ret = -1;
14745 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014746 if (sax != NULL)
14747 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014748 if (ctxt->myDoc != NULL) {
14749 xmlFreeDoc(ctxt->myDoc);
14750 ctxt->myDoc = NULL;
14751 }
Owen Taylor3473f882001-02-23 17:55:21 +000014752 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014753
Owen Taylor3473f882001-02-23 17:55:21 +000014754 return ret;
14755}
Daniel Veillard81273902003-09-30 00:43:48 +000014756#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014757
14758/**
14759 * xmlCreateDocParserCtxt:
14760 * @cur: a pointer to an array of xmlChar
14761 *
14762 * Creates a parser context for an XML in-memory document.
14763 *
14764 * Returns the new parser context or NULL
14765 */
14766xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014767xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014768 int len;
14769
14770 if (cur == NULL)
14771 return(NULL);
14772 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014773 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014774}
14775
Daniel Veillard81273902003-09-30 00:43:48 +000014776#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014777/**
14778 * xmlSAXParseDoc:
14779 * @sax: the SAX handler block
14780 * @cur: a pointer to an array of xmlChar
14781 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14782 * documents
14783 *
14784 * parse an XML in-memory document and build a tree.
14785 * It use the given SAX function block to handle the parsing callback.
14786 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014787 *
Owen Taylor3473f882001-02-23 17:55:21 +000014788 * Returns the resulting document tree
14789 */
14790
14791xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014792xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014793 xmlDocPtr ret;
14794 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014795 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014796
Daniel Veillard38936062004-11-04 17:45:11 +000014797 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014798
14799
14800 ctxt = xmlCreateDocParserCtxt(cur);
14801 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014802 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014803 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014804 ctxt->sax = sax;
14805 ctxt->userData = NULL;
14806 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014807 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014808
14809 xmlParseDocument(ctxt);
14810 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14811 else {
14812 ret = NULL;
14813 xmlFreeDoc(ctxt->myDoc);
14814 ctxt->myDoc = NULL;
14815 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014816 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014817 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014818 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014819
Owen Taylor3473f882001-02-23 17:55:21 +000014820 return(ret);
14821}
14822
14823/**
14824 * xmlParseDoc:
14825 * @cur: a pointer to an array of xmlChar
14826 *
14827 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014828 *
Owen Taylor3473f882001-02-23 17:55:21 +000014829 * Returns the resulting document tree
14830 */
14831
14832xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014833xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014834 return(xmlSAXParseDoc(NULL, cur, 0));
14835}
Daniel Veillard81273902003-09-30 00:43:48 +000014836#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014837
Daniel Veillard81273902003-09-30 00:43:48 +000014838#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014839/************************************************************************
14840 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014841 * Specific function to keep track of entities references *
14842 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014843 * *
14844 ************************************************************************/
14845
14846static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14847
14848/**
14849 * xmlAddEntityReference:
14850 * @ent : A valid entity
14851 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014852 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014853 *
14854 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14855 */
14856static void
14857xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14858 xmlNodePtr lastNode)
14859{
14860 if (xmlEntityRefFunc != NULL) {
14861 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14862 }
14863}
14864
14865
14866/**
14867 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014868 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014869 *
14870 * Set the function to call call back when a xml reference has been made
14871 */
14872void
14873xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14874{
14875 xmlEntityRefFunc = func;
14876}
Daniel Veillard81273902003-09-30 00:43:48 +000014877#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014878
14879/************************************************************************
14880 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014881 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014882 * *
14883 ************************************************************************/
14884
14885#ifdef LIBXML_XPATH_ENABLED
14886#include <libxml/xpath.h>
14887#endif
14888
Daniel Veillardffa3c742005-07-21 13:24:09 +000014889extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014890static int xmlParserInitialized = 0;
14891
14892/**
14893 * xmlInitParser:
14894 *
14895 * Initialization function for the XML parser.
14896 * This is not reentrant. Call once before processing in case of
14897 * use in multithreaded programs.
14898 */
14899
14900void
14901xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014902 if (xmlParserInitialized != 0)
14903 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014904
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014905#ifdef LIBXML_THREAD_ENABLED
14906 __xmlGlobalInitMutexLock();
14907 if (xmlParserInitialized == 0) {
14908#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014909 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014910 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014911 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14912 (xmlGenericError == NULL))
14913 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014914 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014915 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014916 xmlInitCharEncodingHandlers();
14917 xmlDefaultSAXHandlerInit();
14918 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014919#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014920 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014921#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014922#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014923 htmlInitAutoClose();
14924 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014925#endif
14926#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014927 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014928#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014929 xmlParserInitialized = 1;
14930#ifdef LIBXML_THREAD_ENABLED
14931 }
14932 __xmlGlobalInitMutexUnlock();
14933#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014934}
14935
14936/**
14937 * xmlCleanupParser:
14938 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014939 * This function name is somewhat misleading. It does not clean up
14940 * parser state, it cleans up memory allocated by the library itself.
14941 * It is a cleanup function for the XML library. It tries to reclaim all
14942 * related global memory allocated for the library processing.
14943 * It doesn't deallocate any document related memory. One should
14944 * call xmlCleanupParser() only when the process has finished using
14945 * the library and all XML/HTML documents built with it.
14946 * See also xmlInitParser() which has the opposite function of preparing
14947 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014948 *
14949 * WARNING: if your application is multithreaded or has plugin support
14950 * calling this may crash the application if another thread or
14951 * a plugin is still using libxml2. It's sometimes very hard to
14952 * guess if libxml2 is in use in the application, some libraries
14953 * or plugins may use it without notice. In case of doubt abstain
14954 * from calling this function or do it just before calling exit()
14955 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014956 */
14957
14958void
14959xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014960 if (!xmlParserInitialized)
14961 return;
14962
Owen Taylor3473f882001-02-23 17:55:21 +000014963 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014964#ifdef LIBXML_CATALOG_ENABLED
14965 xmlCatalogCleanup();
14966#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014967 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014968 xmlCleanupInputCallbacks();
14969#ifdef LIBXML_OUTPUT_ENABLED
14970 xmlCleanupOutputCallbacks();
14971#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014972#ifdef LIBXML_SCHEMAS_ENABLED
14973 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014974 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014975#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014976 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014977 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014978 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014979 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014980 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014981}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014982
14983/************************************************************************
14984 * *
14985 * New set (2.6.0) of simpler and more flexible APIs *
14986 * *
14987 ************************************************************************/
14988
14989/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014990 * DICT_FREE:
14991 * @str: a string
14992 *
14993 * Free a string if it is not owned by the "dict" dictionnary in the
14994 * current scope
14995 */
14996#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014997 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014998 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14999 xmlFree((char *)(str));
15000
15001/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015002 * xmlCtxtReset:
15003 * @ctxt: an XML parser context
15004 *
15005 * Reset a parser context
15006 */
15007void
15008xmlCtxtReset(xmlParserCtxtPtr ctxt)
15009{
15010 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015011 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015012
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015013 if (ctxt == NULL)
15014 return;
15015
15016 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015017
15018 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15019 xmlFreeInputStream(input);
15020 }
15021 ctxt->inputNr = 0;
15022 ctxt->input = NULL;
15023
15024 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000015025 if (ctxt->spaceTab != NULL) {
15026 ctxt->spaceTab[0] = -1;
15027 ctxt->space = &ctxt->spaceTab[0];
15028 } else {
15029 ctxt->space = NULL;
15030 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015031
15032
15033 ctxt->nodeNr = 0;
15034 ctxt->node = NULL;
15035
15036 ctxt->nameNr = 0;
15037 ctxt->name = NULL;
15038
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015039 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015040 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015041 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015042 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015043 DICT_FREE(ctxt->directory);
15044 ctxt->directory = NULL;
15045 DICT_FREE(ctxt->extSubURI);
15046 ctxt->extSubURI = NULL;
15047 DICT_FREE(ctxt->extSubSystem);
15048 ctxt->extSubSystem = NULL;
15049 if (ctxt->myDoc != NULL)
15050 xmlFreeDoc(ctxt->myDoc);
15051 ctxt->myDoc = NULL;
15052
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015053 ctxt->standalone = -1;
15054 ctxt->hasExternalSubset = 0;
15055 ctxt->hasPErefs = 0;
15056 ctxt->html = 0;
15057 ctxt->external = 0;
15058 ctxt->instate = XML_PARSER_START;
15059 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015060
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015061 ctxt->wellFormed = 1;
15062 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000015063 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015064 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015065#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015066 ctxt->vctxt.userData = ctxt;
15067 ctxt->vctxt.error = xmlParserValidityError;
15068 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015069#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015070 ctxt->record_info = 0;
15071 ctxt->nbChars = 0;
15072 ctxt->checkIndex = 0;
15073 ctxt->inSubset = 0;
15074 ctxt->errNo = XML_ERR_OK;
15075 ctxt->depth = 0;
15076 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15077 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000015078 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000015079 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080015080 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015081 xmlInitNodeInfoSeq(&ctxt->node_seq);
15082
15083 if (ctxt->attsDefault != NULL) {
15084 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15085 ctxt->attsDefault = NULL;
15086 }
15087 if (ctxt->attsSpecial != NULL) {
15088 xmlHashFree(ctxt->attsSpecial, NULL);
15089 ctxt->attsSpecial = NULL;
15090 }
15091
Daniel Veillard4432df22003-09-28 18:58:27 +000015092#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015093 if (ctxt->catalogs != NULL)
15094 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000015095#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000015096 if (ctxt->lastError.code != XML_ERR_OK)
15097 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015098}
15099
15100/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015101 * xmlCtxtResetPush:
15102 * @ctxt: an XML parser context
15103 * @chunk: a pointer to an array of chars
15104 * @size: number of chars in the array
15105 * @filename: an optional file name or URI
15106 * @encoding: the document encoding, or NULL
15107 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015108 * Reset a push parser context
15109 *
15110 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015111 */
15112int
15113xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15114 int size, const char *filename, const char *encoding)
15115{
15116 xmlParserInputPtr inputStream;
15117 xmlParserInputBufferPtr buf;
15118 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15119
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015120 if (ctxt == NULL)
15121 return(1);
15122
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015123 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15124 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15125
15126 buf = xmlAllocParserInputBuffer(enc);
15127 if (buf == NULL)
15128 return(1);
15129
15130 if (ctxt == NULL) {
15131 xmlFreeParserInputBuffer(buf);
15132 return(1);
15133 }
15134
15135 xmlCtxtReset(ctxt);
15136
15137 if (ctxt->pushTab == NULL) {
15138 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15139 sizeof(xmlChar *));
15140 if (ctxt->pushTab == NULL) {
15141 xmlErrMemory(ctxt, NULL);
15142 xmlFreeParserInputBuffer(buf);
15143 return(1);
15144 }
15145 }
15146
15147 if (filename == NULL) {
15148 ctxt->directory = NULL;
15149 } else {
15150 ctxt->directory = xmlParserGetDirectory(filename);
15151 }
15152
15153 inputStream = xmlNewInputStream(ctxt);
15154 if (inputStream == NULL) {
15155 xmlFreeParserInputBuffer(buf);
15156 return(1);
15157 }
15158
15159 if (filename == NULL)
15160 inputStream->filename = NULL;
15161 else
15162 inputStream->filename = (char *)
15163 xmlCanonicPath((const xmlChar *) filename);
15164 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080015165 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015166
15167 inputPush(ctxt, inputStream);
15168
15169 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15170 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015171 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15172 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015173
15174 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15175
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015176 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015177#ifdef DEBUG_PUSH
15178 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15179#endif
15180 }
15181
15182 if (encoding != NULL) {
15183 xmlCharEncodingHandlerPtr hdlr;
15184
Daniel Veillard37334572008-07-31 08:20:02 +000015185 if (ctxt->encoding != NULL)
15186 xmlFree((xmlChar *) ctxt->encoding);
15187 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15188
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015189 hdlr = xmlFindCharEncodingHandler(encoding);
15190 if (hdlr != NULL) {
15191 xmlSwitchToEncoding(ctxt, hdlr);
15192 } else {
15193 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15194 "Unsupported encoding %s\n", BAD_CAST encoding);
15195 }
15196 } else if (enc != XML_CHAR_ENCODING_NONE) {
15197 xmlSwitchEncoding(ctxt, enc);
15198 }
15199
15200 return(0);
15201}
15202
Daniel Veillard37334572008-07-31 08:20:02 +000015203
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015204/**
Daniel Veillard37334572008-07-31 08:20:02 +000015205 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015206 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015207 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015208 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015209 *
15210 * Applies the options to the parser context
15211 *
15212 * Returns 0 in case of success, the set of unknown or unimplemented options
15213 * in case of error.
15214 */
Daniel Veillard37334572008-07-31 08:20:02 +000015215static int
15216xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015217{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015218 if (ctxt == NULL)
15219 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015220 if (encoding != NULL) {
15221 if (ctxt->encoding != NULL)
15222 xmlFree((xmlChar *) ctxt->encoding);
15223 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15224 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015225 if (options & XML_PARSE_RECOVER) {
15226 ctxt->recovery = 1;
15227 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015228 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015229 } else
15230 ctxt->recovery = 0;
15231 if (options & XML_PARSE_DTDLOAD) {
15232 ctxt->loadsubset = XML_DETECT_IDS;
15233 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015234 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015235 } else
15236 ctxt->loadsubset = 0;
15237 if (options & XML_PARSE_DTDATTR) {
15238 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15239 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015240 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015241 }
15242 if (options & XML_PARSE_NOENT) {
15243 ctxt->replaceEntities = 1;
15244 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15245 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015246 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015247 } else
15248 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015249 if (options & XML_PARSE_PEDANTIC) {
15250 ctxt->pedantic = 1;
15251 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015252 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015253 } else
15254 ctxt->pedantic = 0;
15255 if (options & XML_PARSE_NOBLANKS) {
15256 ctxt->keepBlanks = 0;
15257 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15258 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015259 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015260 } else
15261 ctxt->keepBlanks = 1;
15262 if (options & XML_PARSE_DTDVALID) {
15263 ctxt->validate = 1;
15264 if (options & XML_PARSE_NOWARNING)
15265 ctxt->vctxt.warning = NULL;
15266 if (options & XML_PARSE_NOERROR)
15267 ctxt->vctxt.error = NULL;
15268 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015269 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015270 } else
15271 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015272 if (options & XML_PARSE_NOWARNING) {
15273 ctxt->sax->warning = NULL;
15274 options -= XML_PARSE_NOWARNING;
15275 }
15276 if (options & XML_PARSE_NOERROR) {
15277 ctxt->sax->error = NULL;
15278 ctxt->sax->fatalError = NULL;
15279 options -= XML_PARSE_NOERROR;
15280 }
Daniel Veillard81273902003-09-30 00:43:48 +000015281#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015282 if (options & XML_PARSE_SAX1) {
15283 ctxt->sax->startElement = xmlSAX2StartElement;
15284 ctxt->sax->endElement = xmlSAX2EndElement;
15285 ctxt->sax->startElementNs = NULL;
15286 ctxt->sax->endElementNs = NULL;
15287 ctxt->sax->initialized = 1;
15288 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015289 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015290 }
Daniel Veillard81273902003-09-30 00:43:48 +000015291#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015292 if (options & XML_PARSE_NODICT) {
15293 ctxt->dictNames = 0;
15294 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015295 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015296 } else {
15297 ctxt->dictNames = 1;
15298 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015299 if (options & XML_PARSE_NOCDATA) {
15300 ctxt->sax->cdataBlock = NULL;
15301 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015302 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015303 }
15304 if (options & XML_PARSE_NSCLEAN) {
15305 ctxt->options |= XML_PARSE_NSCLEAN;
15306 options -= XML_PARSE_NSCLEAN;
15307 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015308 if (options & XML_PARSE_NONET) {
15309 ctxt->options |= XML_PARSE_NONET;
15310 options -= XML_PARSE_NONET;
15311 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015312 if (options & XML_PARSE_COMPACT) {
15313 ctxt->options |= XML_PARSE_COMPACT;
15314 options -= XML_PARSE_COMPACT;
15315 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015316 if (options & XML_PARSE_OLD10) {
15317 ctxt->options |= XML_PARSE_OLD10;
15318 options -= XML_PARSE_OLD10;
15319 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015320 if (options & XML_PARSE_NOBASEFIX) {
15321 ctxt->options |= XML_PARSE_NOBASEFIX;
15322 options -= XML_PARSE_NOBASEFIX;
15323 }
15324 if (options & XML_PARSE_HUGE) {
15325 ctxt->options |= XML_PARSE_HUGE;
15326 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015327 if (ctxt->dict != NULL)
15328 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015329 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015330 if (options & XML_PARSE_OLDSAX) {
15331 ctxt->options |= XML_PARSE_OLDSAX;
15332 options -= XML_PARSE_OLDSAX;
15333 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015334 if (options & XML_PARSE_IGNORE_ENC) {
15335 ctxt->options |= XML_PARSE_IGNORE_ENC;
15336 options -= XML_PARSE_IGNORE_ENC;
15337 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015338 if (options & XML_PARSE_BIG_LINES) {
15339 ctxt->options |= XML_PARSE_BIG_LINES;
15340 options -= XML_PARSE_BIG_LINES;
15341 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015342 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015343 return (options);
15344}
15345
15346/**
Daniel Veillard37334572008-07-31 08:20:02 +000015347 * xmlCtxtUseOptions:
15348 * @ctxt: an XML parser context
15349 * @options: a combination of xmlParserOption
15350 *
15351 * Applies the options to the parser context
15352 *
15353 * Returns 0 in case of success, the set of unknown or unimplemented options
15354 * in case of error.
15355 */
15356int
15357xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15358{
15359 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15360}
15361
15362/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015363 * xmlDoRead:
15364 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015365 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015366 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015367 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015368 * @reuse: keep the context for reuse
15369 *
15370 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015371 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015372 * Returns the resulting document tree or NULL
15373 */
15374static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015375xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15376 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015377{
15378 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015379
15380 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015381 if (encoding != NULL) {
15382 xmlCharEncodingHandlerPtr hdlr;
15383
15384 hdlr = xmlFindCharEncodingHandler(encoding);
15385 if (hdlr != NULL)
15386 xmlSwitchToEncoding(ctxt, hdlr);
15387 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015388 if ((URL != NULL) && (ctxt->input != NULL) &&
15389 (ctxt->input->filename == NULL))
15390 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015391 xmlParseDocument(ctxt);
15392 if ((ctxt->wellFormed) || ctxt->recovery)
15393 ret = ctxt->myDoc;
15394 else {
15395 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015396 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015397 xmlFreeDoc(ctxt->myDoc);
15398 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015399 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015400 ctxt->myDoc = NULL;
15401 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015402 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015403 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015404
15405 return (ret);
15406}
15407
15408/**
15409 * xmlReadDoc:
15410 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015411 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015412 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015413 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015414 *
15415 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015416 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015417 * Returns the resulting document tree
15418 */
15419xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015420xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015421{
15422 xmlParserCtxtPtr ctxt;
15423
15424 if (cur == NULL)
15425 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015426 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015427
15428 ctxt = xmlCreateDocParserCtxt(cur);
15429 if (ctxt == NULL)
15430 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015431 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015432}
15433
15434/**
15435 * xmlReadFile:
15436 * @filename: a file or URL
15437 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015438 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015439 *
15440 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015441 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015442 * Returns the resulting document tree
15443 */
15444xmlDocPtr
15445xmlReadFile(const char *filename, const char *encoding, int options)
15446{
15447 xmlParserCtxtPtr ctxt;
15448
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015449 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015450 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015451 if (ctxt == NULL)
15452 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015453 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015454}
15455
15456/**
15457 * xmlReadMemory:
15458 * @buffer: a pointer to a char array
15459 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015460 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015461 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015462 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015463 *
15464 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015465 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015466 * Returns the resulting document tree
15467 */
15468xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015469xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015470{
15471 xmlParserCtxtPtr ctxt;
15472
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015473 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015474 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15475 if (ctxt == NULL)
15476 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015477 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015478}
15479
15480/**
15481 * xmlReadFd:
15482 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015483 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015484 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015485 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015486 *
15487 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015488 * NOTE that the file descriptor will not be closed when the
15489 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015490 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015491 * Returns the resulting document tree
15492 */
15493xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015494xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015495{
15496 xmlParserCtxtPtr ctxt;
15497 xmlParserInputBufferPtr input;
15498 xmlParserInputPtr stream;
15499
15500 if (fd < 0)
15501 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015502 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015503
15504 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15505 if (input == NULL)
15506 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015507 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015508 ctxt = xmlNewParserCtxt();
15509 if (ctxt == NULL) {
15510 xmlFreeParserInputBuffer(input);
15511 return (NULL);
15512 }
15513 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15514 if (stream == NULL) {
15515 xmlFreeParserInputBuffer(input);
15516 xmlFreeParserCtxt(ctxt);
15517 return (NULL);
15518 }
15519 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015520 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015521}
15522
15523/**
15524 * xmlReadIO:
15525 * @ioread: an I/O read function
15526 * @ioclose: an I/O close function
15527 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015528 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015529 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015530 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015531 *
15532 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015533 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015534 * Returns the resulting document tree
15535 */
15536xmlDocPtr
15537xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015538 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015539{
15540 xmlParserCtxtPtr ctxt;
15541 xmlParserInputBufferPtr input;
15542 xmlParserInputPtr stream;
15543
15544 if (ioread == NULL)
15545 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015546 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015547
15548 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15549 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015550 if (input == NULL) {
15551 if (ioclose != NULL)
15552 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015553 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015554 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015555 ctxt = xmlNewParserCtxt();
15556 if (ctxt == NULL) {
15557 xmlFreeParserInputBuffer(input);
15558 return (NULL);
15559 }
15560 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15561 if (stream == NULL) {
15562 xmlFreeParserInputBuffer(input);
15563 xmlFreeParserCtxt(ctxt);
15564 return (NULL);
15565 }
15566 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015567 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015568}
15569
15570/**
15571 * xmlCtxtReadDoc:
15572 * @ctxt: an XML parser context
15573 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015574 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015575 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015576 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015577 *
15578 * parse an XML in-memory document and build a tree.
15579 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015580 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015581 * Returns the resulting document tree
15582 */
15583xmlDocPtr
15584xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015585 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015586{
15587 xmlParserInputPtr stream;
15588
15589 if (cur == NULL)
15590 return (NULL);
15591 if (ctxt == NULL)
15592 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015593 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015594
15595 xmlCtxtReset(ctxt);
15596
15597 stream = xmlNewStringInputStream(ctxt, cur);
15598 if (stream == NULL) {
15599 return (NULL);
15600 }
15601 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015602 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015603}
15604
15605/**
15606 * xmlCtxtReadFile:
15607 * @ctxt: an XML parser context
15608 * @filename: a file or URL
15609 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015610 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015611 *
15612 * parse an XML file from the filesystem or the network.
15613 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015614 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015615 * Returns the resulting document tree
15616 */
15617xmlDocPtr
15618xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15619 const char *encoding, int options)
15620{
15621 xmlParserInputPtr stream;
15622
15623 if (filename == NULL)
15624 return (NULL);
15625 if (ctxt == NULL)
15626 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015627 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015628
15629 xmlCtxtReset(ctxt);
15630
Daniel Veillard29614c72004-11-26 10:47:26 +000015631 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015632 if (stream == NULL) {
15633 return (NULL);
15634 }
15635 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015636 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015637}
15638
15639/**
15640 * xmlCtxtReadMemory:
15641 * @ctxt: an XML parser context
15642 * @buffer: a pointer to a char array
15643 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015644 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015645 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015646 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015647 *
15648 * parse an XML in-memory document and build a tree.
15649 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015650 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015651 * Returns the resulting document tree
15652 */
15653xmlDocPtr
15654xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015655 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015656{
15657 xmlParserInputBufferPtr input;
15658 xmlParserInputPtr stream;
15659
15660 if (ctxt == NULL)
15661 return (NULL);
15662 if (buffer == NULL)
15663 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015664 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015665
15666 xmlCtxtReset(ctxt);
15667
15668 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15669 if (input == NULL) {
15670 return(NULL);
15671 }
15672
15673 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15674 if (stream == NULL) {
15675 xmlFreeParserInputBuffer(input);
15676 return(NULL);
15677 }
15678
15679 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015680 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015681}
15682
15683/**
15684 * xmlCtxtReadFd:
15685 * @ctxt: an XML parser context
15686 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015687 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015688 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015689 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015690 *
15691 * parse an XML from a file descriptor and build a tree.
15692 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015693 * NOTE that the file descriptor will not be closed when the
15694 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015695 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015696 * Returns the resulting document tree
15697 */
15698xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015699xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15700 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015701{
15702 xmlParserInputBufferPtr input;
15703 xmlParserInputPtr stream;
15704
15705 if (fd < 0)
15706 return (NULL);
15707 if (ctxt == NULL)
15708 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015709 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015710
15711 xmlCtxtReset(ctxt);
15712
15713
15714 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15715 if (input == NULL)
15716 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015717 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015718 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15719 if (stream == NULL) {
15720 xmlFreeParserInputBuffer(input);
15721 return (NULL);
15722 }
15723 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015724 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015725}
15726
15727/**
15728 * xmlCtxtReadIO:
15729 * @ctxt: an XML parser context
15730 * @ioread: an I/O read function
15731 * @ioclose: an I/O close function
15732 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015733 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015734 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015735 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015736 *
15737 * parse an XML document from I/O functions and source and build a tree.
15738 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015739 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015740 * Returns the resulting document tree
15741 */
15742xmlDocPtr
15743xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15744 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015745 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015746 const char *encoding, int options)
15747{
15748 xmlParserInputBufferPtr input;
15749 xmlParserInputPtr stream;
15750
15751 if (ioread == NULL)
15752 return (NULL);
15753 if (ctxt == NULL)
15754 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015755 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015756
15757 xmlCtxtReset(ctxt);
15758
15759 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15760 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015761 if (input == NULL) {
15762 if (ioclose != NULL)
15763 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015764 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015765 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015766 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15767 if (stream == NULL) {
15768 xmlFreeParserInputBuffer(input);
15769 return (NULL);
15770 }
15771 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015772 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015773}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015774
15775#define bottom_parser
15776#include "elfgcchack.h"