blob: ab007aaa048b5b3f3e54c02c3d49b762bdd18b48 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080097static void xmlHaltParser(xmlParserCtxtPtr ctxt);
98
Daniel Veillard0161e632008-08-28 15:36:32 +000099/************************************************************************
100 * *
101 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
102 * *
103 ************************************************************************/
104
105#define XML_PARSER_BIG_ENTITY 1000
106#define XML_PARSER_LOT_ENTITY 5000
107
108/*
109 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110 * replacement over the size in byte of the input indicates that you have
111 * and eponential behaviour. A value of 10 correspond to at least 3 entity
112 * replacement per byte of input.
113 */
114#define XML_PARSER_NON_LINEAR 10
115
116/*
117 * xmlParserEntityCheck
118 *
119 * Function to check non-linear entity expansion behaviour
120 * This is here to detect and stop exponential linear entity expansion
121 * This is not a limitation of the parser but a safety
122 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123 * parser option.
124 */
125static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800126xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800127 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000128{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800129 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000130
131 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
132 return (0);
133 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
134 return (1);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800135
136 /*
137 * This may look absurd but is needed to detect
138 * entities problems
139 */
140 if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
141 (ent->content != NULL) && (ent->checked == 0)) {
142 unsigned long oldnbent = ctxt->nbentities;
143 xmlChar *rep;
144
145 ent->checked = 1;
146
147 rep = xmlStringDecodeEntities(ctxt, ent->content,
148 XML_SUBSTITUTE_REF, 0, 0, 0);
149
150 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
151 if (rep != NULL) {
152 if (xmlStrchr(rep, '<'))
153 ent->checked |= 1;
154 xmlFree(rep);
155 rep = NULL;
156 }
157 }
Daniel Veillard23f05e02013-02-19 10:21:49 +0800158 if (replacement != 0) {
159 if (replacement < XML_MAX_TEXT_LENGTH)
160 return(0);
161
162 /*
163 * If the volume of entity copy reaches 10 times the
164 * amount of parsed data and over the large text threshold
165 * then that's very likely to be an abuse.
166 */
167 if (ctxt->input != NULL) {
168 consumed = ctxt->input->consumed +
169 (ctxt->input->cur - ctxt->input->base);
170 }
171 consumed += ctxt->sizeentities;
172
173 if (replacement < XML_PARSER_NON_LINEAR * consumed)
174 return(0);
175 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000176 /*
177 * Do the check based on the replacement size of the entity
178 */
179 if (size < XML_PARSER_BIG_ENTITY)
180 return(0);
181
182 /*
183 * A limit on the amount of text data reasonably used
184 */
185 if (ctxt->input != NULL) {
186 consumed = ctxt->input->consumed +
187 (ctxt->input->cur - ctxt->input->base);
188 }
189 consumed += ctxt->sizeentities;
190
191 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
192 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
193 return (0);
194 } else if (ent != NULL) {
195 /*
196 * use the number of parsed entities in the replacement
197 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800198 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000199
200 /*
201 * The amount of data parsed counting entities size only once
202 */
203 if (ctxt->input != NULL) {
204 consumed = ctxt->input->consumed +
205 (ctxt->input->cur - ctxt->input->base);
206 }
207 consumed += ctxt->sizeentities;
208
209 /*
210 * Check the density of entities for the amount of data
211 * knowing an entity reference will take at least 3 bytes
212 */
213 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
214 return (0);
215 } else {
216 /*
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800217 * strange we got no data for checking
Daniel Veillard0161e632008-08-28 15:36:32 +0000218 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +0800219 if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
220 (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
221 (ctxt->nbentities <= 10000))
222 return (0);
Daniel Veillard0161e632008-08-28 15:36:32 +0000223 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000224 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
225 return (1);
226}
227
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000228/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000229 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000230 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000231 * arbitrary depth limit for the XML documents that we allow to
232 * process. This is not a limitation of the parser but a safety
233 * boundary feature. It can be disabled with the XML_PARSE_HUGE
234 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000235 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000236unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000237
Daniel Veillard0fb18932003-09-07 09:14:37 +0000238
Daniel Veillard0161e632008-08-28 15:36:32 +0000239
240#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000241#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000242#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000243#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
244
Daniel Veillard1f972e92012-08-15 10:16:37 +0800245/**
246 * XML_PARSER_CHUNK_SIZE
247 *
248 * When calling GROW that's the minimal amount of data
249 * the parser expected to have received. It is not a hard
250 * limit but an optimization when reading strings like Names
251 * It is not strictly needed as long as inputs available characters
252 * are followed by 0, which should be provided by the I/O level
253 */
254#define XML_PARSER_CHUNK_SIZE 100
255
Owen Taylor3473f882001-02-23 17:55:21 +0000256/*
Owen Taylor3473f882001-02-23 17:55:21 +0000257 * List of XML prefixed PI allowed by W3C specs
258 */
259
Daniel Veillardb44025c2001-10-11 22:55:55 +0000260static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000261 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800262 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000263 NULL
264};
265
Daniel Veillarda07050d2003-10-19 14:46:32 +0000266
Owen Taylor3473f882001-02-23 17:55:21 +0000267/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200268static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
269 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000270
Daniel Veillard7d515752003-09-26 19:12:37 +0000271static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000272xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
273 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000274 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000275 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000276
Daniel Veillard37334572008-07-31 08:20:02 +0000277static int
278xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
279 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000280#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000281static void
282xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
283 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000284#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000285
Daniel Veillard7d515752003-09-26 19:12:37 +0000286static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000287xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
288 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000289
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000290static int
291xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
292
Daniel Veillarde57ec792003-09-10 10:50:59 +0000293/************************************************************************
294 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800295 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 * *
297 ************************************************************************/
298
299/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 * xmlErrAttributeDup:
301 * @ctxt: an XML parser context
302 * @prefix: the attribute prefix
303 * @localname: the attribute localname
304 *
305 * Handle a redefinition of attribute error
306 */
307static void
308xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
309 const xmlChar * localname)
310{
Daniel Veillard157fee02003-10-31 10:36:03 +0000311 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
312 (ctxt->instate == XML_PARSER_EOF))
313 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000314 if (ctxt != NULL)
315 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200316
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000317 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000318 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200319 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000320 (const char *) localname, NULL, NULL, 0, 0,
321 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000322 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000323 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200324 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 (const char *) prefix, (const char *) localname,
326 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
327 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000328 if (ctxt != NULL) {
329 ctxt->wellFormed = 0;
330 if (ctxt->recovery == 0)
331 ctxt->disableSAX = 1;
332 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333}
334
335/**
336 * xmlFatalErr:
337 * @ctxt: an XML parser context
338 * @error: the error number
339 * @extra: extra information string
340 *
341 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
342 */
343static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345{
346 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348
Daniel Veillard157fee02003-10-31 10:36:03 +0000349 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
350 (ctxt->instate == XML_PARSER_EOF))
351 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 switch (error) {
353 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800354 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000356 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800357 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000359 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800360 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000362 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 errmsg = "internal error";
364 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000365 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800366 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800369 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800372 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000374 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800375 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000377 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800378 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000380 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800381 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000383 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800384 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000386 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800387 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000389 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800390 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000392 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800393 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000395 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800396 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000398 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800399 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000400 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000401 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800402 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000403 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000404 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800405 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800408 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800411 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800414 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800417 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800420 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800423 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800426 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "Fragment not allowed";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800441 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800444 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800447 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800450 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800453 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
468 case XML_ERR_CONDSEC_INVALID_KEYWORD:
469 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800470 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000471 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000472 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800473 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000474 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000475 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800476 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000477 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000478 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800479 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000480 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000481 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800482 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000484 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800485 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000486 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000487 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800488 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000489 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000490 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800491 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000492 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000493 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800494 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000495 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000496 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800497 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000499 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800503 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000504 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000507 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000508 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800509 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000510 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000511 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800512 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000513 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000514 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800515 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000516 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000517 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800518 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000519 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000520 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800521 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000522 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800523 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800524 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800525 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000526#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000527 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800528 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000529 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000530#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800532 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000533 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800534 if (info == NULL)
535 snprintf(errstr, 128, "%s\n", errmsg);
536 else
537 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000538 if (ctxt != NULL)
539 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000540 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800541 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000542 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000543 if (ctxt != NULL) {
544 ctxt->wellFormed = 0;
545 if (ctxt->recovery == 0)
546 ctxt->disableSAX = 1;
547 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000548}
549
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000550/**
551 * xmlFatalErrMsg:
552 * @ctxt: an XML parser context
553 * @error: the error number
554 * @msg: the error message
555 *
556 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
557 */
558static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000559xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000561{
Daniel Veillard157fee02003-10-31 10:36:03 +0000562 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
563 (ctxt->instate == XML_PARSER_EOF))
564 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000565 if (ctxt != NULL)
566 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000567 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200568 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000569 if (ctxt != NULL) {
570 ctxt->wellFormed = 0;
571 if (ctxt->recovery == 0)
572 ctxt->disableSAX = 1;
573 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000574}
575
576/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000577 * xmlWarningMsg:
578 * @ctxt: an XML parser context
579 * @error: the error number
580 * @msg: the error message
581 * @str1: extra data
582 * @str2: extra data
583 *
584 * Handle a warning.
585 */
586static void
587xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
588 const char *msg, const xmlChar *str1, const xmlChar *str2)
589{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000590 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000591
Daniel Veillard157fee02003-10-31 10:36:03 +0000592 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
593 (ctxt->instate == XML_PARSER_EOF))
594 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000595 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
596 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000597 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200598 if (ctxt != NULL) {
599 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000600 (ctxt->sax) ? ctxt->sax->warning : NULL,
601 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000602 ctxt, NULL, XML_FROM_PARSER, error,
603 XML_ERR_WARNING, NULL, 0,
604 (const char *) str1, (const char *) str2, NULL, 0, 0,
605 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200606 } else {
607 __xmlRaiseError(schannel, NULL, NULL,
608 ctxt, NULL, XML_FROM_PARSER, error,
609 XML_ERR_WARNING, NULL, 0,
610 (const char *) str1, (const char *) str2, NULL, 0, 0,
611 msg, (const char *) str1, (const char *) str2);
612 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613}
614
615/**
616 * xmlValidityError:
617 * @ctxt: an XML parser context
618 * @error: the error number
619 * @msg: the error message
620 * @str1: extra data
621 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000622 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000623 */
624static void
625xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000626 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000627{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000628 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000629
630 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
631 (ctxt->instate == XML_PARSER_EOF))
632 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000633 if (ctxt != NULL) {
634 ctxt->errNo = error;
635 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
636 schannel = ctxt->sax->serror;
637 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200638 if (ctxt != NULL) {
639 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000640 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000641 ctxt, NULL, XML_FROM_DTD, error,
642 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000643 (const char *) str2, NULL, 0, 0,
644 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200646 } else {
647 __xmlRaiseError(schannel, NULL, NULL,
648 ctxt, NULL, XML_FROM_DTD, error,
649 XML_ERR_ERROR, NULL, 0, (const char *) str1,
650 (const char *) str2, NULL, 0, 0,
651 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000652 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000653}
654
655/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000656 * xmlFatalErrMsgInt:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @val: an integer value
661 *
662 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
663 */
664static void
665xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000666 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000667{
Daniel Veillard157fee02003-10-31 10:36:03 +0000668 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
669 (ctxt->instate == XML_PARSER_EOF))
670 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000671 if (ctxt != NULL)
672 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000673 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000674 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
675 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000676 if (ctxt != NULL) {
677 ctxt->wellFormed = 0;
678 if (ctxt->recovery == 0)
679 ctxt->disableSAX = 1;
680 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000681}
682
683/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000684 * xmlFatalErrMsgStrIntStr:
685 * @ctxt: an XML parser context
686 * @error: the error number
687 * @msg: the error message
688 * @str1: an string info
689 * @val: an integer value
690 * @str2: an string info
691 *
692 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
693 */
694static void
695xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800696 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000697 const xmlChar *str2)
698{
Daniel Veillard157fee02003-10-31 10:36:03 +0000699 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
700 (ctxt->instate == XML_PARSER_EOF))
701 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000702 if (ctxt != NULL)
703 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000704 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000705 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
706 NULL, 0, (const char *) str1, (const char *) str2,
707 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000708 if (ctxt != NULL) {
709 ctxt->wellFormed = 0;
710 if (ctxt->recovery == 0)
711 ctxt->disableSAX = 1;
712 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000713}
714
715/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000716 * xmlFatalErrMsgStr:
717 * @ctxt: an XML parser context
718 * @error: the error number
719 * @msg: the error message
720 * @val: a string value
721 *
722 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
723 */
724static void
725xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000726 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000727{
Daniel Veillard157fee02003-10-31 10:36:03 +0000728 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
729 (ctxt->instate == XML_PARSER_EOF))
730 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000731 if (ctxt != NULL)
732 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000733 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000734 XML_FROM_PARSER, error, XML_ERR_FATAL,
735 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
736 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000737 if (ctxt != NULL) {
738 ctxt->wellFormed = 0;
739 if (ctxt->recovery == 0)
740 ctxt->disableSAX = 1;
741 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000742}
743
744/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000745 * xmlErrMsgStr:
746 * @ctxt: an XML parser context
747 * @error: the error number
748 * @msg: the error message
749 * @val: a string value
750 *
751 * Handle a non fatal parser error
752 */
753static void
754xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
755 const char *msg, const xmlChar * val)
756{
Daniel Veillard157fee02003-10-31 10:36:03 +0000757 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
758 (ctxt->instate == XML_PARSER_EOF))
759 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000760 if (ctxt != NULL)
761 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000762 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000763 XML_FROM_PARSER, error, XML_ERR_ERROR,
764 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
765 val);
766}
767
768/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000769 * xmlNsErr:
770 * @ctxt: an XML parser context
771 * @error: the error number
772 * @msg: the message
773 * @info1: extra information string
774 * @info2: extra information string
775 *
776 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
777 */
778static void
779xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
780 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000781 const xmlChar * info1, const xmlChar * info2,
782 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000783{
Daniel Veillard157fee02003-10-31 10:36:03 +0000784 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
785 (ctxt->instate == XML_PARSER_EOF))
786 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000787 if (ctxt != NULL)
788 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000789 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000790 XML_ERR_ERROR, NULL, 0, (const char *) info1,
791 (const char *) info2, (const char *) info3, 0, 0, msg,
792 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000793 if (ctxt != NULL)
794 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000795}
796
Daniel Veillard37334572008-07-31 08:20:02 +0000797/**
798 * xmlNsWarn
799 * @ctxt: an XML parser context
800 * @error: the error number
801 * @msg: the message
802 * @info1: extra information string
803 * @info2: extra information string
804 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800805 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000806 */
807static void
808xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
809 const char *msg,
810 const xmlChar * info1, const xmlChar * info2,
811 const xmlChar * info3)
812{
813 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
814 (ctxt->instate == XML_PARSER_EOF))
815 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000816 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
817 XML_ERR_WARNING, NULL, 0, (const char *) info1,
818 (const char *) info2, (const char *) info3, 0, 0, msg,
819 info1, info2, info3);
820}
821
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000822/************************************************************************
823 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800824 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000825 * *
826 ************************************************************************/
827
828/**
829 * xmlHasFeature:
830 * @feature: the feature to be examined
831 *
832 * Examines if the library has been compiled with a given feature.
833 *
834 * Returns a non-zero value if the feature exist, otherwise zero.
835 * Returns zero (0) if the feature does not exist or an unknown
836 * unknown feature is requested, non-zero otherwise.
837 */
838int
839xmlHasFeature(xmlFeature feature)
840{
841 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000842 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000843#ifdef LIBXML_THREAD_ENABLED
844 return(1);
845#else
846 return(0);
847#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000848 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000849#ifdef LIBXML_TREE_ENABLED
850 return(1);
851#else
852 return(0);
853#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000854 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000855#ifdef LIBXML_OUTPUT_ENABLED
856 return(1);
857#else
858 return(0);
859#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000860 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000861#ifdef LIBXML_PUSH_ENABLED
862 return(1);
863#else
864 return(0);
865#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000866 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000867#ifdef LIBXML_READER_ENABLED
868 return(1);
869#else
870 return(0);
871#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000872 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000873#ifdef LIBXML_PATTERN_ENABLED
874 return(1);
875#else
876 return(0);
877#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000878 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000879#ifdef LIBXML_WRITER_ENABLED
880 return(1);
881#else
882 return(0);
883#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000884 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000885#ifdef LIBXML_SAX1_ENABLED
886 return(1);
887#else
888 return(0);
889#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000890 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000891#ifdef LIBXML_FTP_ENABLED
892 return(1);
893#else
894 return(0);
895#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000896 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000897#ifdef LIBXML_HTTP_ENABLED
898 return(1);
899#else
900 return(0);
901#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000902 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000903#ifdef LIBXML_VALID_ENABLED
904 return(1);
905#else
906 return(0);
907#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000908 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000909#ifdef LIBXML_HTML_ENABLED
910 return(1);
911#else
912 return(0);
913#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000914 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000915#ifdef LIBXML_LEGACY_ENABLED
916 return(1);
917#else
918 return(0);
919#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000920 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000921#ifdef LIBXML_C14N_ENABLED
922 return(1);
923#else
924 return(0);
925#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000926 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000927#ifdef LIBXML_CATALOG_ENABLED
928 return(1);
929#else
930 return(0);
931#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000932 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000933#ifdef LIBXML_XPATH_ENABLED
934 return(1);
935#else
936 return(0);
937#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000938 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000939#ifdef LIBXML_XPTR_ENABLED
940 return(1);
941#else
942 return(0);
943#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000944 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000945#ifdef LIBXML_XINCLUDE_ENABLED
946 return(1);
947#else
948 return(0);
949#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000950 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000951#ifdef LIBXML_ICONV_ENABLED
952 return(1);
953#else
954 return(0);
955#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000956 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000957#ifdef LIBXML_ISO8859X_ENABLED
958 return(1);
959#else
960 return(0);
961#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000962 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000963#ifdef LIBXML_UNICODE_ENABLED
964 return(1);
965#else
966 return(0);
967#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000968 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000969#ifdef LIBXML_REGEXP_ENABLED
970 return(1);
971#else
972 return(0);
973#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000974 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000975#ifdef LIBXML_AUTOMATA_ENABLED
976 return(1);
977#else
978 return(0);
979#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000980 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000981#ifdef LIBXML_EXPR_ENABLED
982 return(1);
983#else
984 return(0);
985#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000986 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000987#ifdef LIBXML_SCHEMAS_ENABLED
988 return(1);
989#else
990 return(0);
991#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000992 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000993#ifdef LIBXML_SCHEMATRON_ENABLED
994 return(1);
995#else
996 return(0);
997#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000998 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000999#ifdef LIBXML_MODULES_ENABLED
1000 return(1);
1001#else
1002 return(0);
1003#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001004 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001005#ifdef LIBXML_DEBUG_ENABLED
1006 return(1);
1007#else
1008 return(0);
1009#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001010 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001011#ifdef DEBUG_MEMORY_LOCATION
1012 return(1);
1013#else
1014 return(0);
1015#endif
Daniel Veillard602434d2005-09-12 09:20:31 +00001016 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001017#ifdef LIBXML_DEBUG_RUNTIME
1018 return(1);
1019#else
1020 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001021#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +00001022 case XML_WITH_ZLIB:
1023#ifdef LIBXML_ZLIB_ENABLED
1024 return(1);
1025#else
1026 return(0);
1027#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001028 case XML_WITH_LZMA:
1029#ifdef LIBXML_LZMA_ENABLED
1030 return(1);
1031#else
1032 return(0);
1033#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001034 case XML_WITH_ICU:
1035#ifdef LIBXML_ICU_ENABLED
1036 return(1);
1037#else
1038 return(0);
1039#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001040 default:
1041 break;
1042 }
1043 return(0);
1044}
1045
1046/************************************************************************
1047 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001048 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001049 * *
1050 ************************************************************************/
1051
1052/**
1053 * xmlDetectSAX2:
1054 * @ctxt: an XML parser context
1055 *
1056 * Do the SAX2 detection and specific intialization
1057 */
1058static void
1059xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1060 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001061#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1063 ((ctxt->sax->startElementNs != NULL) ||
1064 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001065#else
1066 ctxt->sax2 = 1;
1067#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001068
1069 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1070 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1071 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001072 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1073 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001074 xmlErrMemory(ctxt, NULL);
1075 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001076}
1077
Daniel Veillarde57ec792003-09-10 10:50:59 +00001078typedef struct _xmlDefAttrs xmlDefAttrs;
1079typedef xmlDefAttrs *xmlDefAttrsPtr;
1080struct _xmlDefAttrs {
1081 int nbAttrs; /* number of defaulted attributes on that element */
1082 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001083 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001084};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001085
1086/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001087 * xmlAttrNormalizeSpace:
1088 * @src: the source string
1089 * @dst: the target string
1090 *
1091 * Normalize the space in non CDATA attribute values:
1092 * If the attribute type is not CDATA, then the XML processor MUST further
1093 * process the normalized attribute value by discarding any leading and
1094 * trailing space (#x20) characters, and by replacing sequences of space
1095 * (#x20) characters by a single space (#x20) character.
1096 * Note that the size of dst need to be at least src, and if one doesn't need
1097 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1098 * passing src as dst is just fine.
1099 *
1100 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1101 * is needed.
1102 */
1103static xmlChar *
1104xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1105{
1106 if ((src == NULL) || (dst == NULL))
1107 return(NULL);
1108
1109 while (*src == 0x20) src++;
1110 while (*src != 0) {
1111 if (*src == 0x20) {
1112 while (*src == 0x20) src++;
1113 if (*src != 0)
1114 *dst++ = 0x20;
1115 } else {
1116 *dst++ = *src++;
1117 }
1118 }
1119 *dst = 0;
1120 if (dst == src)
1121 return(NULL);
1122 return(dst);
1123}
1124
1125/**
1126 * xmlAttrNormalizeSpace2:
1127 * @src: the source string
1128 *
1129 * Normalize the space in non CDATA attribute values, a slightly more complex
1130 * front end to avoid allocation problems when running on attribute values
1131 * coming from the input.
1132 *
1133 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1134 * is needed.
1135 */
1136static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001137xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001138{
1139 int i;
1140 int remove_head = 0;
1141 int need_realloc = 0;
1142 const xmlChar *cur;
1143
1144 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1145 return(NULL);
1146 i = *len;
1147 if (i <= 0)
1148 return(NULL);
1149
1150 cur = src;
1151 while (*cur == 0x20) {
1152 cur++;
1153 remove_head++;
1154 }
1155 while (*cur != 0) {
1156 if (*cur == 0x20) {
1157 cur++;
1158 if ((*cur == 0x20) || (*cur == 0)) {
1159 need_realloc = 1;
1160 break;
1161 }
1162 } else
1163 cur++;
1164 }
1165 if (need_realloc) {
1166 xmlChar *ret;
1167
1168 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1169 if (ret == NULL) {
1170 xmlErrMemory(ctxt, NULL);
1171 return(NULL);
1172 }
1173 xmlAttrNormalizeSpace(ret, ret);
1174 *len = (int) strlen((const char *)ret);
1175 return(ret);
1176 } else if (remove_head) {
1177 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001178 memmove(src, src + remove_head, 1 + *len);
1179 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001180 }
1181 return(NULL);
1182}
1183
1184/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 * xmlAddDefAttrs:
1186 * @ctxt: an XML parser context
1187 * @fullname: the element fullname
1188 * @fullattr: the attribute fullname
1189 * @value: the attribute value
1190 *
1191 * Add a defaulted attribute for an element
1192 */
1193static void
1194xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1195 const xmlChar *fullname,
1196 const xmlChar *fullattr,
1197 const xmlChar *value) {
1198 xmlDefAttrsPtr defaults;
1199 int len;
1200 const xmlChar *name;
1201 const xmlChar *prefix;
1202
Daniel Veillard6a31b832008-03-26 14:06:44 +00001203 /*
1204 * Allows to detect attribute redefinitions
1205 */
1206 if (ctxt->attsSpecial != NULL) {
1207 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1208 return;
1209 }
1210
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001212 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001213 if (ctxt->attsDefault == NULL)
1214 goto mem_error;
1215 }
1216
1217 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001218 * split the element name into prefix:localname , the string found
1219 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001220 */
1221 name = xmlSplitQName3(fullname, &len);
1222 if (name == NULL) {
1223 name = xmlDictLookup(ctxt->dict, fullname, -1);
1224 prefix = NULL;
1225 } else {
1226 name = xmlDictLookup(ctxt->dict, name, -1);
1227 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1228 }
1229
1230 /*
1231 * make sure there is some storage
1232 */
1233 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1234 if (defaults == NULL) {
1235 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001236 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001237 if (defaults == NULL)
1238 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001239 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001240 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001241 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1242 defaults, NULL) < 0) {
1243 xmlFree(defaults);
1244 goto mem_error;
1245 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001246 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001247 xmlDefAttrsPtr temp;
1248
1249 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001250 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001251 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001252 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001253 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001254 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001255 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1256 defaults, NULL) < 0) {
1257 xmlFree(defaults);
1258 goto mem_error;
1259 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001260 }
1261
1262 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001263 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 * are within the DTD and hen not associated to namespace names.
1265 */
1266 name = xmlSplitQName3(fullattr, &len);
1267 if (name == NULL) {
1268 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1269 prefix = NULL;
1270 } else {
1271 name = xmlDictLookup(ctxt->dict, name, -1);
1272 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1273 }
1274
Daniel Veillardae0765b2008-07-31 19:54:59 +00001275 defaults->values[5 * defaults->nbAttrs] = name;
1276 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001277 /* intern the string and precompute the end */
1278 len = xmlStrlen(value);
1279 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001280 defaults->values[5 * defaults->nbAttrs + 2] = value;
1281 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1282 if (ctxt->external)
1283 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1284 else
1285 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001286 defaults->nbAttrs++;
1287
1288 return;
1289
1290mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001291 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001292 return;
1293}
1294
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001295/**
1296 * xmlAddSpecialAttr:
1297 * @ctxt: an XML parser context
1298 * @fullname: the element fullname
1299 * @fullattr: the attribute fullname
1300 * @type: the attribute type
1301 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001302 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001303 */
1304static void
1305xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1306 const xmlChar *fullname,
1307 const xmlChar *fullattr,
1308 int type)
1309{
1310 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001311 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001312 if (ctxt->attsSpecial == NULL)
1313 goto mem_error;
1314 }
1315
Daniel Veillardac4118d2008-01-11 05:27:32 +00001316 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1317 return;
1318
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001319 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1320 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001321 return;
1322
1323mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001324 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001325 return;
1326}
1327
Daniel Veillard4432df22003-09-28 18:58:27 +00001328/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001329 * xmlCleanSpecialAttrCallback:
1330 *
1331 * Removes CDATA attributes from the special attribute table
1332 */
1333static void
1334xmlCleanSpecialAttrCallback(void *payload, void *data,
1335 const xmlChar *fullname, const xmlChar *fullattr,
1336 const xmlChar *unused ATTRIBUTE_UNUSED) {
1337 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1338
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001339 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001340 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1341 }
1342}
1343
1344/**
1345 * xmlCleanSpecialAttr:
1346 * @ctxt: an XML parser context
1347 *
1348 * Trim the list of attributes defined to remove all those of type
1349 * CDATA as they are not special. This call should be done when finishing
1350 * to parse the DTD and before starting to parse the document root.
1351 */
1352static void
1353xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1354{
1355 if (ctxt->attsSpecial == NULL)
1356 return;
1357
1358 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1359
1360 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1361 xmlHashFree(ctxt->attsSpecial, NULL);
1362 ctxt->attsSpecial = NULL;
1363 }
1364 return;
1365}
1366
1367/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001368 * xmlCheckLanguageID:
1369 * @lang: pointer to the string value
1370 *
1371 * Checks that the value conforms to the LanguageID production:
1372 *
1373 * NOTE: this is somewhat deprecated, those productions were removed from
1374 * the XML Second edition.
1375 *
1376 * [33] LanguageID ::= Langcode ('-' Subcode)*
1377 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1378 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1379 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1380 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1381 * [38] Subcode ::= ([a-z] | [A-Z])+
1382 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001383 * The current REC reference the sucessors of RFC 1766, currently 5646
1384 *
1385 * http://www.rfc-editor.org/rfc/rfc5646.txt
1386 * langtag = language
1387 * ["-" script]
1388 * ["-" region]
1389 * *("-" variant)
1390 * *("-" extension)
1391 * ["-" privateuse]
1392 * language = 2*3ALPHA ; shortest ISO 639 code
1393 * ["-" extlang] ; sometimes followed by
1394 * ; extended language subtags
1395 * / 4ALPHA ; or reserved for future use
1396 * / 5*8ALPHA ; or registered language subtag
1397 *
1398 * extlang = 3ALPHA ; selected ISO 639 codes
1399 * *2("-" 3ALPHA) ; permanently reserved
1400 *
1401 * script = 4ALPHA ; ISO 15924 code
1402 *
1403 * region = 2ALPHA ; ISO 3166-1 code
1404 * / 3DIGIT ; UN M.49 code
1405 *
1406 * variant = 5*8alphanum ; registered variants
1407 * / (DIGIT 3alphanum)
1408 *
1409 * extension = singleton 1*("-" (2*8alphanum))
1410 *
1411 * ; Single alphanumerics
1412 * ; "x" reserved for private use
1413 * singleton = DIGIT ; 0 - 9
1414 * / %x41-57 ; A - W
1415 * / %x59-5A ; Y - Z
1416 * / %x61-77 ; a - w
1417 * / %x79-7A ; y - z
1418 *
1419 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1420 * The parser below doesn't try to cope with extension or privateuse
1421 * that could be added but that's not interoperable anyway
1422 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001423 * Returns 1 if correct 0 otherwise
1424 **/
1425int
1426xmlCheckLanguageID(const xmlChar * lang)
1427{
Daniel Veillard60587d62010-11-04 15:16:27 +01001428 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001429
1430 if (cur == NULL)
1431 return (0);
1432 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001433 ((cur[0] == 'I') && (cur[1] == '-')) ||
1434 ((cur[0] == 'x') && (cur[1] == '-')) ||
1435 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001436 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001437 * Still allow IANA code and user code which were coming
1438 * from the previous version of the XML-1.0 specification
1439 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001440 */
1441 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001442 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001443 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1444 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001445 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001446 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001447 nxt = cur;
1448 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1449 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1450 nxt++;
1451 if (nxt - cur >= 4) {
1452 /*
1453 * Reserved
1454 */
1455 if ((nxt - cur > 8) || (nxt[0] != 0))
1456 return(0);
1457 return(1);
1458 }
1459 if (nxt - cur < 2)
1460 return(0);
1461 /* we got an ISO 639 code */
1462 if (nxt[0] == 0)
1463 return(1);
1464 if (nxt[0] != '-')
1465 return(0);
1466
1467 nxt++;
1468 cur = nxt;
1469 /* now we can have extlang or script or region or variant */
1470 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1471 goto region_m49;
1472
1473 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1474 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1475 nxt++;
1476 if (nxt - cur == 4)
1477 goto script;
1478 if (nxt - cur == 2)
1479 goto region;
1480 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1481 goto variant;
1482 if (nxt - cur != 3)
1483 return(0);
1484 /* we parsed an extlang */
1485 if (nxt[0] == 0)
1486 return(1);
1487 if (nxt[0] != '-')
1488 return(0);
1489
1490 nxt++;
1491 cur = nxt;
1492 /* now we can have script or region or variant */
1493 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1494 goto region_m49;
1495
1496 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498 nxt++;
1499 if (nxt - cur == 2)
1500 goto region;
1501 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1502 goto variant;
1503 if (nxt - cur != 4)
1504 return(0);
1505 /* we parsed a script */
1506script:
1507 if (nxt[0] == 0)
1508 return(1);
1509 if (nxt[0] != '-')
1510 return(0);
1511
1512 nxt++;
1513 cur = nxt;
1514 /* now we can have region or variant */
1515 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1516 goto region_m49;
1517
1518 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1519 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1520 nxt++;
1521
1522 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1523 goto variant;
1524 if (nxt - cur != 2)
1525 return(0);
1526 /* we parsed a region */
1527region:
1528 if (nxt[0] == 0)
1529 return(1);
1530 if (nxt[0] != '-')
1531 return(0);
1532
1533 nxt++;
1534 cur = nxt;
1535 /* now we can just have a variant */
1536 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1537 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1538 nxt++;
1539
1540 if ((nxt - cur < 5) || (nxt - cur > 8))
1541 return(0);
1542
1543 /* we parsed a variant */
1544variant:
1545 if (nxt[0] == 0)
1546 return(1);
1547 if (nxt[0] != '-')
1548 return(0);
1549 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001550 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001551
1552region_m49:
1553 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1554 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1555 nxt += 3;
1556 goto region;
1557 }
1558 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001559}
1560
Owen Taylor3473f882001-02-23 17:55:21 +00001561/************************************************************************
1562 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001563 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001564 * *
1565 ************************************************************************/
1566
Daniel Veillard8ed10722009-08-20 19:17:36 +02001567static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1568 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001569
Daniel Veillard0fb18932003-09-07 09:14:37 +00001570#ifdef SAX2
1571/**
1572 * nsPush:
1573 * @ctxt: an XML parser context
1574 * @prefix: the namespace prefix or NULL
1575 * @URL: the namespace name
1576 *
1577 * Pushes a new parser namespace on top of the ns stack
1578 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001579 * Returns -1 in case of error, -2 if the namespace should be discarded
1580 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 */
1582static int
1583nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1584{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001585 if (ctxt->options & XML_PARSE_NSCLEAN) {
1586 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001587 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001588 if (ctxt->nsTab[i] == prefix) {
1589 /* in scope */
1590 if (ctxt->nsTab[i + 1] == URL)
1591 return(-2);
1592 /* out of scope keep it */
1593 break;
1594 }
1595 }
1596 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001597 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1598 ctxt->nsMax = 10;
1599 ctxt->nsNr = 0;
1600 ctxt->nsTab = (const xmlChar **)
1601 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1602 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001603 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001604 ctxt->nsMax = 0;
1605 return (-1);
1606 }
1607 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001608 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001609 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001610 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1611 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1612 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001613 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001614 ctxt->nsMax /= 2;
1615 return (-1);
1616 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001617 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001618 }
1619 ctxt->nsTab[ctxt->nsNr++] = prefix;
1620 ctxt->nsTab[ctxt->nsNr++] = URL;
1621 return (ctxt->nsNr);
1622}
1623/**
1624 * nsPop:
1625 * @ctxt: an XML parser context
1626 * @nr: the number to pop
1627 *
1628 * Pops the top @nr parser prefix/namespace from the ns stack
1629 *
1630 * Returns the number of namespaces removed
1631 */
1632static int
1633nsPop(xmlParserCtxtPtr ctxt, int nr)
1634{
1635 int i;
1636
1637 if (ctxt->nsTab == NULL) return(0);
1638 if (ctxt->nsNr < nr) {
1639 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1640 nr = ctxt->nsNr;
1641 }
1642 if (ctxt->nsNr <= 0)
1643 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001644
Daniel Veillard0fb18932003-09-07 09:14:37 +00001645 for (i = 0;i < nr;i++) {
1646 ctxt->nsNr--;
1647 ctxt->nsTab[ctxt->nsNr] = NULL;
1648 }
1649 return(nr);
1650}
1651#endif
1652
1653static int
1654xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1655 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657 int maxatts;
1658
1659 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001660 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001661 atts = (const xmlChar **)
1662 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001663 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001664 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001665 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1666 if (attallocs == NULL) goto mem_error;
1667 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001668 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001669 } else if (nr + 5 > ctxt->maxatts) {
1670 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001671 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1672 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001673 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001674 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001675 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1676 (maxatts / 5) * sizeof(int));
1677 if (attallocs == NULL) goto mem_error;
1678 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001679 ctxt->maxatts = maxatts;
1680 }
1681 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001682mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001683 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001684 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001685}
1686
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001687/**
1688 * inputPush:
1689 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001690 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691 *
1692 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001693 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001694 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001695 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001696int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1698{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001699 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001700 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001701 if (ctxt->inputNr >= ctxt->inputMax) {
1702 ctxt->inputMax *= 2;
1703 ctxt->inputTab =
1704 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1705 ctxt->inputMax *
1706 sizeof(ctxt->inputTab[0]));
1707 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001708 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001709 xmlFreeInputStream(value);
1710 ctxt->inputMax /= 2;
1711 value = NULL;
1712 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001713 }
1714 }
1715 ctxt->inputTab[ctxt->inputNr] = value;
1716 ctxt->input = value;
1717 return (ctxt->inputNr++);
1718}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001719/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001720 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 * @ctxt: an XML parser context
1722 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001723 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001724 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001725 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001726 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001727xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001728inputPop(xmlParserCtxtPtr ctxt)
1729{
1730 xmlParserInputPtr ret;
1731
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001732 if (ctxt == NULL)
1733 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001734 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001735 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001736 ctxt->inputNr--;
1737 if (ctxt->inputNr > 0)
1738 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1739 else
1740 ctxt->input = NULL;
1741 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001742 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001743 return (ret);
1744}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001745/**
1746 * nodePush:
1747 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001748 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001749 *
1750 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001751 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001752 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001753 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001754int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001755nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1756{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001757 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001758 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001759 xmlNodePtr *tmp;
1760
1761 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1762 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001763 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001764 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001765 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001766 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001767 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001768 ctxt->nodeTab = tmp;
1769 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001771 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1772 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001773 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001774 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001775 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +08001776 xmlHaltParser(ctxt);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001777 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001778 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 ctxt->nodeTab[ctxt->nodeNr] = value;
1780 ctxt->node = value;
1781 return (ctxt->nodeNr++);
1782}
Daniel Veillard8915c152008-08-26 13:05:34 +00001783
Daniel Veillard1c732d22002-11-30 11:22:59 +00001784/**
1785 * nodePop:
1786 * @ctxt: an XML parser context
1787 *
1788 * Pops the top element node from the node stack
1789 *
1790 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001791 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001792xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001793nodePop(xmlParserCtxtPtr ctxt)
1794{
1795 xmlNodePtr ret;
1796
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001797 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001798 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001799 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001800 ctxt->nodeNr--;
1801 if (ctxt->nodeNr > 0)
1802 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1803 else
1804 ctxt->node = NULL;
1805 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001806 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001807 return (ret);
1808}
Daniel Veillarda2351322004-06-27 12:08:10 +00001809
1810#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001811/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001812 * nameNsPush:
1813 * @ctxt: an XML parser context
1814 * @value: the element name
1815 * @prefix: the element prefix
1816 * @URI: the element namespace name
1817 *
1818 * Pushes a new element name/prefix/URL on top of the name stack
1819 *
1820 * Returns -1 in case of error, the index in the stack otherwise
1821 */
1822static int
1823nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1824 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1825{
1826 if (ctxt->nameNr >= ctxt->nameMax) {
1827 const xmlChar * *tmp;
1828 void **tmp2;
1829 ctxt->nameMax *= 2;
1830 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1831 ctxt->nameMax *
1832 sizeof(ctxt->nameTab[0]));
1833 if (tmp == NULL) {
1834 ctxt->nameMax /= 2;
1835 goto mem_error;
1836 }
1837 ctxt->nameTab = tmp;
1838 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1839 ctxt->nameMax * 3 *
1840 sizeof(ctxt->pushTab[0]));
1841 if (tmp2 == NULL) {
1842 ctxt->nameMax /= 2;
1843 goto mem_error;
1844 }
1845 ctxt->pushTab = tmp2;
1846 }
1847 ctxt->nameTab[ctxt->nameNr] = value;
1848 ctxt->name = value;
1849 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1850 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001851 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001852 return (ctxt->nameNr++);
1853mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001854 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001855 return (-1);
1856}
1857/**
1858 * nameNsPop:
1859 * @ctxt: an XML parser context
1860 *
1861 * Pops the top element/prefix/URI name from the name stack
1862 *
1863 * Returns the name just removed
1864 */
1865static const xmlChar *
1866nameNsPop(xmlParserCtxtPtr ctxt)
1867{
1868 const xmlChar *ret;
1869
1870 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001871 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001872 ctxt->nameNr--;
1873 if (ctxt->nameNr > 0)
1874 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1875 else
1876 ctxt->name = NULL;
1877 ret = ctxt->nameTab[ctxt->nameNr];
1878 ctxt->nameTab[ctxt->nameNr] = NULL;
1879 return (ret);
1880}
Daniel Veillarda2351322004-06-27 12:08:10 +00001881#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001882
1883/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001884 * namePush:
1885 * @ctxt: an XML parser context
1886 * @value: the element name
1887 *
1888 * Pushes a new element name on top of the name stack
1889 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001890 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001891 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001892int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001893namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001894{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001895 if (ctxt == NULL) return (-1);
1896
Daniel Veillard1c732d22002-11-30 11:22:59 +00001897 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001898 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001899 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001900 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001901 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001902 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001903 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001904 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001905 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001906 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001907 }
1908 ctxt->nameTab[ctxt->nameNr] = value;
1909 ctxt->name = value;
1910 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001911mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001912 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001913 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001914}
1915/**
1916 * namePop:
1917 * @ctxt: an XML parser context
1918 *
1919 * Pops the top element name from the name stack
1920 *
1921 * Returns the name just removed
1922 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001923const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001924namePop(xmlParserCtxtPtr ctxt)
1925{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001926 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001927
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001928 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1929 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001930 ctxt->nameNr--;
1931 if (ctxt->nameNr > 0)
1932 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1933 else
1934 ctxt->name = NULL;
1935 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001936 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001937 return (ret);
1938}
Owen Taylor3473f882001-02-23 17:55:21 +00001939
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001940static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001941 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001942 int *tmp;
1943
Owen Taylor3473f882001-02-23 17:55:21 +00001944 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001945 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1946 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1947 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001948 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001949 ctxt->spaceMax /=2;
1950 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001951 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001952 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001953 }
1954 ctxt->spaceTab[ctxt->spaceNr] = val;
1955 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1956 return(ctxt->spaceNr++);
1957}
1958
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001959static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001960 int ret;
1961 if (ctxt->spaceNr <= 0) return(0);
1962 ctxt->spaceNr--;
1963 if (ctxt->spaceNr > 0)
1964 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1965 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001966 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001967 ret = ctxt->spaceTab[ctxt->spaceNr];
1968 ctxt->spaceTab[ctxt->spaceNr] = -1;
1969 return(ret);
1970}
1971
1972/*
1973 * Macros for accessing the content. Those should be used only by the parser,
1974 * and not exported.
1975 *
1976 * Dirty macros, i.e. one often need to make assumption on the context to
1977 * use them
1978 *
1979 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1980 * To be used with extreme caution since operations consuming
1981 * characters may move the input buffer to a different location !
1982 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1983 * This should be used internally by the parser
1984 * only to compare to ASCII values otherwise it would break when
1985 * running with UTF-8 encoding.
1986 * RAW same as CUR but in the input buffer, bypass any token
1987 * extraction that may have been done
1988 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1989 * to compare on ASCII based substring.
1990 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001991 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001992 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001993 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001994 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1995 *
1996 * NEXT Skip to the next character, this does the proper decoding
1997 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001998 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001999 * CUR_CHAR(l) returns the current unicode character (int), set l
2000 * to the number of xmlChars used for the encoding [0-5].
2001 * CUR_SCHAR same but operate on a string instead of the context
2002 * COPY_BUF copy the current unicode char to the target buffer, increment
2003 * the index
2004 * GROW, SHRINK handling of input buffers
2005 */
2006
Daniel Veillardfdc91562002-07-01 21:52:03 +00002007#define RAW (*ctxt->input->cur)
2008#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00002009#define NXT(val) ctxt->input->cur[(val)]
2010#define CUR_PTR ctxt->input->cur
2011
Daniel Veillarda07050d2003-10-19 14:46:32 +00002012#define CMP4( s, c1, c2, c3, c4 ) \
2013 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2014 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2015#define CMP5( s, c1, c2, c3, c4, c5 ) \
2016 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2017#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2018 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2019#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2020 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2021#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2022 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2023#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2024 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2025 ((unsigned char *) s)[ 8 ] == c9 )
2026#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2027 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2028 ((unsigned char *) s)[ 9 ] == c10 )
2029
Owen Taylor3473f882001-02-23 17:55:21 +00002030#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002031 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002032 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002033 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002034 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2035 xmlPopInput(ctxt); \
2036 } while (0)
2037
Daniel Veillard0b787f32004-03-26 17:29:53 +00002038#define SKIPL(val) do { \
2039 int skipl; \
2040 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002041 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002042 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002043 } else ctxt->input->col++; \
2044 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002045 ctxt->input->cur++; \
2046 } \
2047 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2048 if ((*ctxt->input->cur == 0) && \
2049 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2050 xmlPopInput(ctxt); \
2051 } while (0)
2052
Daniel Veillarda880b122003-04-21 21:36:41 +00002053#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002054 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2055 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002056 xmlSHRINK (ctxt);
2057
2058static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2059 xmlParserInputShrink(ctxt->input);
2060 if ((*ctxt->input->cur == 0) &&
2061 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2062 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002063 }
Owen Taylor3473f882001-02-23 17:55:21 +00002064
Daniel Veillarda880b122003-04-21 21:36:41 +00002065#define GROW if ((ctxt->progressive == 0) && \
2066 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002067 xmlGROW (ctxt);
2068
2069static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002070 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2071 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2072
2073 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2074 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002075 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002076 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2077 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002078 xmlHaltParser(ctxt);
2079 return;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002080 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002081 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard35bcb1d2015-11-20 15:04:09 +08002082 if ((ctxt->input->cur > ctxt->input->end) ||
2083 (ctxt->input->cur < ctxt->input->base)) {
2084 xmlHaltParser(ctxt);
2085 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2086 return;
2087 }
Daniel Veillard59df7832010-02-02 10:24:01 +01002088 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002089 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2090 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002091}
Owen Taylor3473f882001-02-23 17:55:21 +00002092
2093#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2094
2095#define NEXT xmlNextChar(ctxt)
2096
Daniel Veillard21a0f912001-02-25 19:54:14 +00002097#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002098 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002099 ctxt->input->cur++; \
2100 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002101 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002102 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2103 }
2104
Owen Taylor3473f882001-02-23 17:55:21 +00002105#define NEXTL(l) do { \
2106 if (*(ctxt->input->cur) == '\n') { \
2107 ctxt->input->line++; ctxt->input->col = 1; \
2108 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002109 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002110 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002111 } while (0)
2112
2113#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2114#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2115
2116#define COPY_BUF(l,b,i,v) \
2117 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002118 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002119
2120/**
2121 * xmlSkipBlankChars:
2122 * @ctxt: the XML parser context
2123 *
2124 * skip all blanks character found at that point in the input streams.
2125 * It pops up finished entities in the process if allowable at that point.
2126 *
2127 * Returns the number of space chars skipped
2128 */
2129
2130int
2131xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002132 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002133
2134 /*
2135 * It's Okay to use CUR/NEXT here since all the blanks are on
2136 * the ASCII range.
2137 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002138 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2139 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002140 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002141 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002142 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002143 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002144 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002145 if (*cur == '\n') {
2146 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002147 } else {
2148 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002149 }
2150 cur++;
2151 res++;
2152 if (*cur == 0) {
2153 ctxt->input->cur = cur;
2154 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2155 cur = ctxt->input->cur;
2156 }
2157 }
2158 ctxt->input->cur = cur;
2159 } else {
2160 int cur;
2161 do {
2162 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002163 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002164 NEXT;
2165 cur = CUR;
2166 res++;
2167 }
2168 while ((cur == 0) && (ctxt->inputNr > 1) &&
2169 (ctxt->instate != XML_PARSER_COMMENT)) {
2170 xmlPopInput(ctxt);
2171 cur = CUR;
2172 }
2173 /*
2174 * Need to handle support of entities branching here
2175 */
2176 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2177 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2178 }
Owen Taylor3473f882001-02-23 17:55:21 +00002179 return(res);
2180}
2181
2182/************************************************************************
2183 * *
2184 * Commodity functions to handle entities *
2185 * *
2186 ************************************************************************/
2187
2188/**
2189 * xmlPopInput:
2190 * @ctxt: an XML parser context
2191 *
2192 * xmlPopInput: the current input pointed by ctxt->input came to an end
2193 * pop it and return the next char.
2194 *
2195 * Returns the current xmlChar in the parser context
2196 */
2197xmlChar
2198xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002199 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002200 if (xmlParserDebugEntities)
2201 xmlGenericError(xmlGenericErrorContext,
2202 "Popping input %d\n", ctxt->inputNr);
2203 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002204 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002205 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2206 return(xmlPopInput(ctxt));
2207 return(CUR);
2208}
2209
2210/**
2211 * xmlPushInput:
2212 * @ctxt: an XML parser context
2213 * @input: an XML parser input fragment (entity, XML fragment ...).
2214 *
2215 * xmlPushInput: switch to a new input stream which is stacked on top
2216 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002217 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002218 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002219int
Owen Taylor3473f882001-02-23 17:55:21 +00002220xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002221 int ret;
2222 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002223
2224 if (xmlParserDebugEntities) {
2225 if ((ctxt->input != NULL) && (ctxt->input->filename))
2226 xmlGenericError(xmlGenericErrorContext,
2227 "%s(%d): ", ctxt->input->filename,
2228 ctxt->input->line);
2229 xmlGenericError(xmlGenericErrorContext,
2230 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2231 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002232 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002233 if (ctxt->instate == XML_PARSER_EOF)
2234 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002235 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002236 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002237}
2238
2239/**
2240 * xmlParseCharRef:
2241 * @ctxt: an XML parser context
2242 *
2243 * parse Reference declarations
2244 *
2245 * [66] CharRef ::= '&#' [0-9]+ ';' |
2246 * '&#x' [0-9a-fA-F]+ ';'
2247 *
2248 * [ WFC: Legal Character ]
2249 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002250 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002251 *
2252 * Returns the value parsed (as an int), 0 in case of error
2253 */
2254int
2255xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002256 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002257 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002258 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002259
Owen Taylor3473f882001-02-23 17:55:21 +00002260 /*
2261 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2262 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002263 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002264 (NXT(2) == 'x')) {
2265 SKIP(3);
2266 GROW;
2267 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002268 if (count++ > 20) {
2269 count = 0;
2270 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002271 if (ctxt->instate == XML_PARSER_EOF)
2272 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002273 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002274 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002275 val = val * 16 + (CUR - '0');
2276 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2277 val = val * 16 + (CUR - 'a') + 10;
2278 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2279 val = val * 16 + (CUR - 'A') + 10;
2280 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002281 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002282 val = 0;
2283 break;
2284 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002285 if (val > 0x10FFFF)
2286 outofrange = val;
2287
Owen Taylor3473f882001-02-23 17:55:21 +00002288 NEXT;
2289 count++;
2290 }
2291 if (RAW == ';') {
2292 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002293 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002294 ctxt->nbChars ++;
2295 ctxt->input->cur++;
2296 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002297 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002298 SKIP(2);
2299 GROW;
2300 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002301 if (count++ > 20) {
2302 count = 0;
2303 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002304 if (ctxt->instate == XML_PARSER_EOF)
2305 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002306 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002307 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002308 val = val * 10 + (CUR - '0');
2309 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002310 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002311 val = 0;
2312 break;
2313 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002314 if (val > 0x10FFFF)
2315 outofrange = val;
2316
Owen Taylor3473f882001-02-23 17:55:21 +00002317 NEXT;
2318 count++;
2319 }
2320 if (RAW == ';') {
2321 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002322 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002323 ctxt->nbChars ++;
2324 ctxt->input->cur++;
2325 }
2326 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002327 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002328 }
2329
2330 /*
2331 * [ WFC: Legal Character ]
2332 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002333 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002334 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002335 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002336 return(val);
2337 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002338 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2339 "xmlParseCharRef: invalid xmlChar value %d\n",
2340 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 }
2342 return(0);
2343}
2344
2345/**
2346 * xmlParseStringCharRef:
2347 * @ctxt: an XML parser context
2348 * @str: a pointer to an index in the string
2349 *
2350 * parse Reference declarations, variant parsing from a string rather
2351 * than an an input flow.
2352 *
2353 * [66] CharRef ::= '&#' [0-9]+ ';' |
2354 * '&#x' [0-9a-fA-F]+ ';'
2355 *
2356 * [ WFC: Legal Character ]
2357 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002358 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002359 *
2360 * Returns the value parsed (as an int), 0 in case of error, str will be
2361 * updated to the current value of the index
2362 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002363static int
Owen Taylor3473f882001-02-23 17:55:21 +00002364xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2365 const xmlChar *ptr;
2366 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002367 unsigned int val = 0;
2368 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002369
2370 if ((str == NULL) || (*str == NULL)) return(0);
2371 ptr = *str;
2372 cur = *ptr;
2373 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2374 ptr += 3;
2375 cur = *ptr;
2376 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002377 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002378 val = val * 16 + (cur - '0');
2379 else if ((cur >= 'a') && (cur <= 'f'))
2380 val = val * 16 + (cur - 'a') + 10;
2381 else if ((cur >= 'A') && (cur <= 'F'))
2382 val = val * 16 + (cur - 'A') + 10;
2383 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002384 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002385 val = 0;
2386 break;
2387 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002388 if (val > 0x10FFFF)
2389 outofrange = val;
2390
Owen Taylor3473f882001-02-23 17:55:21 +00002391 ptr++;
2392 cur = *ptr;
2393 }
2394 if (cur == ';')
2395 ptr++;
2396 } else if ((cur == '&') && (ptr[1] == '#')){
2397 ptr += 2;
2398 cur = *ptr;
2399 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002400 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002401 val = val * 10 + (cur - '0');
2402 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002403 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002404 val = 0;
2405 break;
2406 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002407 if (val > 0x10FFFF)
2408 outofrange = val;
2409
Owen Taylor3473f882001-02-23 17:55:21 +00002410 ptr++;
2411 cur = *ptr;
2412 }
2413 if (cur == ';')
2414 ptr++;
2415 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002416 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002417 return(0);
2418 }
2419 *str = ptr;
2420
2421 /*
2422 * [ WFC: Legal Character ]
2423 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002424 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002425 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002426 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002427 return(val);
2428 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002429 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2430 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2431 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002432 }
2433 return(0);
2434}
2435
2436/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002437 * xmlNewBlanksWrapperInputStream:
2438 * @ctxt: an XML parser context
2439 * @entity: an Entity pointer
2440 *
2441 * Create a new input stream for wrapping
2442 * blanks around a PEReference
2443 *
2444 * Returns the new input stream or NULL
2445 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002446
Daniel Veillardf5582f12002-06-11 10:08:16 +00002447static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002448
Daniel Veillardf4862f02002-09-10 11:13:43 +00002449static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002450xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2451 xmlParserInputPtr input;
2452 xmlChar *buffer;
2453 size_t length;
2454 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002455 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2456 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002457 return(NULL);
2458 }
2459 if (xmlParserDebugEntities)
2460 xmlGenericError(xmlGenericErrorContext,
2461 "new blanks wrapper for entity: %s\n", entity->name);
2462 input = xmlNewInputStream(ctxt);
2463 if (input == NULL) {
2464 return(NULL);
2465 }
2466 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002467 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002468 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002469 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002470 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002471 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002472 }
2473 buffer [0] = ' ';
2474 buffer [1] = '%';
2475 buffer [length-3] = ';';
2476 buffer [length-2] = ' ';
2477 buffer [length-1] = 0;
2478 memcpy(buffer + 2, entity->name, length - 5);
2479 input->free = deallocblankswrapper;
2480 input->base = buffer;
2481 input->cur = buffer;
2482 input->length = length;
2483 input->end = &buffer[length];
2484 return(input);
2485}
2486
2487/**
Owen Taylor3473f882001-02-23 17:55:21 +00002488 * xmlParserHandlePEReference:
2489 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002490 *
Owen Taylor3473f882001-02-23 17:55:21 +00002491 * [69] PEReference ::= '%' Name ';'
2492 *
2493 * [ WFC: No Recursion ]
2494 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002495 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002496 *
2497 * [ WFC: Entity Declared ]
2498 * In a document without any DTD, a document with only an internal DTD
2499 * subset which contains no parameter entity references, or a document
2500 * with "standalone='yes'", ... ... The declaration of a parameter
2501 * entity must precede any reference to it...
2502 *
2503 * [ VC: Entity Declared ]
2504 * In a document with an external subset or external parameter entities
2505 * with "standalone='no'", ... ... The declaration of a parameter entity
2506 * must precede any reference to it...
2507 *
2508 * [ WFC: In DTD ]
2509 * Parameter-entity references may only appear in the DTD.
2510 * NOTE: misleading but this is handled.
2511 *
2512 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002513 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002514 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002515 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002516 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002517 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002518 */
2519void
2520xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002521 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002522 xmlEntityPtr entity = NULL;
2523 xmlParserInputPtr input;
2524
Owen Taylor3473f882001-02-23 17:55:21 +00002525 if (RAW != '%') return;
2526 switch(ctxt->instate) {
2527 case XML_PARSER_CDATA_SECTION:
2528 return;
2529 case XML_PARSER_COMMENT:
2530 return;
2531 case XML_PARSER_START_TAG:
2532 return;
2533 case XML_PARSER_END_TAG:
2534 return;
2535 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002536 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002537 return;
2538 case XML_PARSER_PROLOG:
2539 case XML_PARSER_START:
2540 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002541 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002542 return;
2543 case XML_PARSER_ENTITY_DECL:
2544 case XML_PARSER_CONTENT:
2545 case XML_PARSER_ATTRIBUTE_VALUE:
2546 case XML_PARSER_PI:
2547 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002548 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002549 /* we just ignore it there */
2550 return;
2551 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002552 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002553 return;
2554 case XML_PARSER_ENTITY_VALUE:
2555 /*
2556 * NOTE: in the case of entity values, we don't do the
2557 * substitution here since we need the literal
2558 * entity value to be able to save the internal
2559 * subset of the document.
2560 * This will be handled by xmlStringDecodeEntities
2561 */
2562 return;
2563 case XML_PARSER_DTD:
2564 /*
2565 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2566 * In the internal DTD subset, parameter-entity references
2567 * can occur only where markup declarations can occur, not
2568 * within markup declarations.
2569 * In that case this is handled in xmlParseMarkupDecl
2570 */
2571 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2572 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002573 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002574 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002575 break;
2576 case XML_PARSER_IGNORE:
2577 return;
2578 }
2579
2580 NEXT;
2581 name = xmlParseName(ctxt);
2582 if (xmlParserDebugEntities)
2583 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002584 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002585 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002586 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002587 } else {
2588 if (RAW == ';') {
2589 NEXT;
2590 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2591 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002592 if (ctxt->instate == XML_PARSER_EOF)
2593 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002594 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002595
Owen Taylor3473f882001-02-23 17:55:21 +00002596 /*
2597 * [ WFC: Entity Declared ]
2598 * In a document without any DTD, a document with only an
2599 * internal DTD subset which contains no parameter entity
2600 * references, or a document with "standalone='yes'", ...
2601 * ... The declaration of a parameter entity must precede
2602 * any reference to it...
2603 */
2604 if ((ctxt->standalone == 1) ||
2605 ((ctxt->hasExternalSubset == 0) &&
2606 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002607 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002608 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002609 } else {
2610 /*
2611 * [ VC: Entity Declared ]
2612 * In a document with an external subset or external
2613 * parameter entities with "standalone='no'", ...
2614 * ... The declaration of a parameter entity must precede
2615 * any reference to it...
2616 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002617 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2618 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2619 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002620 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002621 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002622 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2623 "PEReference: %%%s; not found\n",
2624 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002625 ctxt->valid = 0;
2626 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002627 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002628 } else if (ctxt->input->free != deallocblankswrapper) {
2629 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002630 if (xmlPushInput(ctxt, input) < 0)
2631 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002632 } else {
2633 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2634 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002635 xmlChar start[4];
2636 xmlCharEncoding enc;
2637
Owen Taylor3473f882001-02-23 17:55:21 +00002638 /*
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002639 * Note: external parameter entities will not be loaded, it
2640 * is not required for a non-validating parser, unless the
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002641 * option of validating, or substituting entities were
2642 * given. Doing so is far more secure as the parser will
2643 * only process data coming from the document entity by
2644 * default.
2645 */
2646 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2647 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2648 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002649 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2650 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2651 (ctxt->replaceEntities == 0) &&
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002652 (ctxt->validate == 0))
2653 return;
2654
2655 /*
Owen Taylor3473f882001-02-23 17:55:21 +00002656 * handle the extra spaces added before and after
2657 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002658 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002659 */
2660 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002661 if (xmlPushInput(ctxt, input) < 0)
2662 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002663
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002664 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002665 * Get the 4 first bytes and decode the charset
2666 * if enc != XML_CHAR_ENCODING_NONE
2667 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002668 * Note that, since we may have some non-UTF8
2669 * encoding (like UTF16, bug 135229), the 'length'
2670 * is not known, but we can calculate based upon
2671 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002672 */
2673 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002674 if (ctxt->instate == XML_PARSER_EOF)
2675 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002676 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002677 start[0] = RAW;
2678 start[1] = NXT(1);
2679 start[2] = NXT(2);
2680 start[3] = NXT(3);
2681 enc = xmlDetectCharEncoding(start, 4);
2682 if (enc != XML_CHAR_ENCODING_NONE) {
2683 xmlSwitchEncoding(ctxt, enc);
2684 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002685 }
2686
Owen Taylor3473f882001-02-23 17:55:21 +00002687 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002688 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2689 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002690 xmlParseTextDecl(ctxt);
2691 }
Owen Taylor3473f882001-02-23 17:55:21 +00002692 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002693 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2694 "PEReference: %s is not a parameter entity\n",
2695 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002696 }
2697 }
2698 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002699 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002700 }
Owen Taylor3473f882001-02-23 17:55:21 +00002701 }
2702}
2703
2704/*
2705 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002706 * buffer##_size is expected to be a size_t
2707 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002708 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002709#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002710 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002711 size_t new_size = buffer##_size * 2 + n; \
2712 if (new_size < buffer##_size) goto mem_error; \
2713 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002714 if (tmp == NULL) goto mem_error; \
2715 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002716 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002717}
2718
2719/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002720 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002721 * @ctxt: the parser context
2722 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002723 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002724 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2725 * @end: an end marker xmlChar, 0 if none
2726 * @end2: an end marker xmlChar, 0 if none
2727 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002728 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002729 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002730 *
2731 * [67] Reference ::= EntityRef | CharRef
2732 *
2733 * [69] PEReference ::= '%' Name ';'
2734 *
2735 * Returns A newly allocated string with the substitution done. The caller
2736 * must deallocate it !
2737 */
2738xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002739xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2740 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002741 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002742 size_t buffer_size = 0;
2743 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002744
2745 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002746 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002747 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002748 xmlEntityPtr ent;
2749 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002750
Daniel Veillarda82b1822004-11-08 16:24:57 +00002751 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002752 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002753 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002754
Daniel Veillard0161e632008-08-28 15:36:32 +00002755 if (((ctxt->depth > 40) &&
2756 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2757 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002758 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002759 return(NULL);
2760 }
2761
2762 /*
2763 * allocate a translation buffer.
2764 */
2765 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002766 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002767 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002768
2769 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002770 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002771 * we are operating on already parsed values.
2772 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002773 if (str < last)
2774 c = CUR_SCHAR(str, l);
2775 else
2776 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002777 while ((c != 0) && (c != end) && /* non input consuming loop */
2778 (c != end2) && (c != end3)) {
2779
2780 if (c == 0) break;
2781 if ((c == '&') && (str[1] == '#')) {
2782 int val = xmlParseStringCharRef(ctxt, &str);
2783 if (val != 0) {
2784 COPY_BUF(0,buffer,nbchars,val);
2785 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002786 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002787 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002788 }
Owen Taylor3473f882001-02-23 17:55:21 +00002789 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2790 if (xmlParserDebugEntities)
2791 xmlGenericError(xmlGenericErrorContext,
2792 "String decoding Entity Reference: %.30s\n",
2793 str);
2794 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002795 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2796 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002797 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002798 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002799 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002800 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002801 if ((ent != NULL) &&
2802 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2803 if (ent->content != NULL) {
2804 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002805 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002806 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002807 }
Owen Taylor3473f882001-02-23 17:55:21 +00002808 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002809 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2810 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002811 }
2812 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002813 ctxt->depth++;
2814 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2815 0, 0, 0);
2816 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002817
David Drysdale69030712015-11-20 11:13:45 +08002818 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2819 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
2820 goto int_error;
2821
Owen Taylor3473f882001-02-23 17:55:21 +00002822 if (rep != NULL) {
2823 current = rep;
2824 while (*current != 0) { /* non input consuming loop */
2825 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002826 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002827 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002828 goto int_error;
2829 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002830 }
2831 }
2832 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002833 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002834 }
2835 } else if (ent != NULL) {
2836 int i = xmlStrlen(ent->name);
2837 const xmlChar *cur = ent->name;
2838
2839 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002840 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002841 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002842 }
2843 for (;i > 0;i--)
2844 buffer[nbchars++] = *cur++;
2845 buffer[nbchars++] = ';';
2846 }
2847 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2848 if (xmlParserDebugEntities)
2849 xmlGenericError(xmlGenericErrorContext,
2850 "String decoding PE Reference: %.30s\n", str);
2851 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002852 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2853 goto int_error;
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08002854 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002855 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002856 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002857 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002858 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002859 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002860 }
Owen Taylor3473f882001-02-23 17:55:21 +00002861 ctxt->depth++;
2862 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2863 0, 0, 0);
2864 ctxt->depth--;
2865 if (rep != NULL) {
2866 current = rep;
2867 while (*current != 0) { /* non input consuming loop */
2868 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002869 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002870 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002871 goto int_error;
2872 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002873 }
2874 }
2875 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002876 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002877 }
2878 }
2879 } else {
2880 COPY_BUF(l,buffer,nbchars,c);
2881 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002882 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2883 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002884 }
2885 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002886 if (str < last)
2887 c = CUR_SCHAR(str, l);
2888 else
2889 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002890 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002891 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002892 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002893
2894mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002895 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002896int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002897 if (rep != NULL)
2898 xmlFree(rep);
2899 if (buffer != NULL)
2900 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002901 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002902}
2903
Daniel Veillarde57ec792003-09-10 10:50:59 +00002904/**
2905 * xmlStringDecodeEntities:
2906 * @ctxt: the parser context
2907 * @str: the input string
2908 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2909 * @end: an end marker xmlChar, 0 if none
2910 * @end2: an end marker xmlChar, 0 if none
2911 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002912 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002913 * Takes a entity string content and process to do the adequate substitutions.
2914 *
2915 * [67] Reference ::= EntityRef | CharRef
2916 *
2917 * [69] PEReference ::= '%' Name ';'
2918 *
2919 * Returns A newly allocated string with the substitution done. The caller
2920 * must deallocate it !
2921 */
2922xmlChar *
2923xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2924 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002925 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002926 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2927 end, end2, end3));
2928}
Owen Taylor3473f882001-02-23 17:55:21 +00002929
2930/************************************************************************
2931 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002932 * Commodity functions, cleanup needed ? *
2933 * *
2934 ************************************************************************/
2935
2936/**
2937 * areBlanks:
2938 * @ctxt: an XML parser context
2939 * @str: a xmlChar *
2940 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002941 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002942 *
2943 * Is this a sequence of blank chars that one can ignore ?
2944 *
2945 * Returns 1 if ignorable 0 otherwise.
2946 */
2947
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002948static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2949 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002950 int i, ret;
2951 xmlNodePtr lastChild;
2952
Daniel Veillard05c13a22001-09-09 08:38:09 +00002953 /*
2954 * Don't spend time trying to differentiate them, the same callback is
2955 * used !
2956 */
2957 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002958 return(0);
2959
Owen Taylor3473f882001-02-23 17:55:21 +00002960 /*
2961 * Check for xml:space value.
2962 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002963 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2964 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002965 return(0);
2966
2967 /*
2968 * Check that the string is made of blanks
2969 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002970 if (blank_chars == 0) {
2971 for (i = 0;i < len;i++)
2972 if (!(IS_BLANK_CH(str[i]))) return(0);
2973 }
Owen Taylor3473f882001-02-23 17:55:21 +00002974
2975 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002976 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002977 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002978 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002979 if (ctxt->myDoc != NULL) {
2980 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2981 if (ret == 0) return(1);
2982 if (ret == 1) return(0);
2983 }
2984
2985 /*
2986 * Otherwise, heuristic :-\
2987 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002988 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002989 if ((ctxt->node->children == NULL) &&
2990 (RAW == '<') && (NXT(1) == '/')) return(0);
2991
2992 lastChild = xmlGetLastChild(ctxt->node);
2993 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002994 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2995 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 } else if (xmlNodeIsText(lastChild))
2997 return(0);
2998 else if ((ctxt->node->children != NULL) &&
2999 (xmlNodeIsText(ctxt->node->children)))
3000 return(0);
3001 return(1);
3002}
3003
Owen Taylor3473f882001-02-23 17:55:21 +00003004/************************************************************************
3005 * *
3006 * Extra stuff for namespace support *
3007 * Relates to http://www.w3.org/TR/WD-xml-names *
3008 * *
3009 ************************************************************************/
3010
3011/**
3012 * xmlSplitQName:
3013 * @ctxt: an XML parser context
3014 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003015 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00003016 *
3017 * parse an UTF8 encoded XML qualified name string
3018 *
3019 * [NS 5] QName ::= (Prefix ':')? LocalPart
3020 *
3021 * [NS 6] Prefix ::= NCName
3022 *
3023 * [NS 7] LocalPart ::= NCName
3024 *
3025 * Returns the local part, and prefix is updated
3026 * to get the Prefix if any.
3027 */
3028
3029xmlChar *
3030xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3031 xmlChar buf[XML_MAX_NAMELEN + 5];
3032 xmlChar *buffer = NULL;
3033 int len = 0;
3034 int max = XML_MAX_NAMELEN;
3035 xmlChar *ret = NULL;
3036 const xmlChar *cur = name;
3037 int c;
3038
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003039 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003040 *prefix = NULL;
3041
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00003042 if (cur == NULL) return(NULL);
3043
Owen Taylor3473f882001-02-23 17:55:21 +00003044#ifndef XML_XML_NAMESPACE
3045 /* xml: prefix is not really a namespace */
3046 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3047 (cur[2] == 'l') && (cur[3] == ':'))
3048 return(xmlStrdup(name));
3049#endif
3050
Daniel Veillard597bc482003-07-24 16:08:28 +00003051 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00003052 if (cur[0] == ':')
3053 return(xmlStrdup(name));
3054
3055 c = *cur++;
3056 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3057 buf[len++] = c;
3058 c = *cur++;
3059 }
3060 if (len >= max) {
3061 /*
3062 * Okay someone managed to make a huge name, so he's ready to pay
3063 * for the processing speed.
3064 */
3065 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003066
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003067 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003068 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003069 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003070 return(NULL);
3071 }
3072 memcpy(buffer, buf, len);
3073 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3074 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003075 xmlChar *tmp;
3076
Owen Taylor3473f882001-02-23 17:55:21 +00003077 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003078 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003079 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003080 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003081 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003082 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003083 return(NULL);
3084 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003085 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003086 }
3087 buffer[len++] = c;
3088 c = *cur++;
3089 }
3090 buffer[len] = 0;
3091 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003092
Daniel Veillard597bc482003-07-24 16:08:28 +00003093 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003094 if (buffer != NULL)
3095 xmlFree(buffer);
3096 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003097 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003098 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003099
Owen Taylor3473f882001-02-23 17:55:21 +00003100 if (buffer == NULL)
3101 ret = xmlStrndup(buf, len);
3102 else {
3103 ret = buffer;
3104 buffer = NULL;
3105 max = XML_MAX_NAMELEN;
3106 }
3107
3108
3109 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003110 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003111 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003112 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003113 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003114 }
Owen Taylor3473f882001-02-23 17:55:21 +00003115 len = 0;
3116
Daniel Veillardbb284f42002-10-16 18:02:47 +00003117 /*
3118 * Check that the first character is proper to start
3119 * a new name
3120 */
3121 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3122 ((c >= 0x41) && (c <= 0x5A)) ||
3123 (c == '_') || (c == ':'))) {
3124 int l;
3125 int first = CUR_SCHAR(cur, l);
3126
3127 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003128 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003129 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003130 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003131 }
3132 }
3133 cur++;
3134
Owen Taylor3473f882001-02-23 17:55:21 +00003135 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3136 buf[len++] = c;
3137 c = *cur++;
3138 }
3139 if (len >= max) {
3140 /*
3141 * Okay someone managed to make a huge name, so he's ready to pay
3142 * for the processing speed.
3143 */
3144 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003145
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003146 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003147 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003148 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003149 return(NULL);
3150 }
3151 memcpy(buffer, buf, len);
3152 while (c != 0) { /* tested bigname2.xml */
3153 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003154 xmlChar *tmp;
3155
Owen Taylor3473f882001-02-23 17:55:21 +00003156 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003157 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003158 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003159 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003160 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003161 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003162 return(NULL);
3163 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003164 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003165 }
3166 buffer[len++] = c;
3167 c = *cur++;
3168 }
3169 buffer[len] = 0;
3170 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003171
Owen Taylor3473f882001-02-23 17:55:21 +00003172 if (buffer == NULL)
3173 ret = xmlStrndup(buf, len);
3174 else {
3175 ret = buffer;
3176 }
3177 }
3178
3179 return(ret);
3180}
3181
3182/************************************************************************
3183 * *
3184 * The parser itself *
3185 * Relates to http://www.w3.org/TR/REC-xml *
3186 * *
3187 ************************************************************************/
3188
Daniel Veillard34e3f642008-07-29 09:02:27 +00003189/************************************************************************
3190 * *
3191 * Routines to parse Name, NCName and NmToken *
3192 * *
3193 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003194#ifdef DEBUG
3195static unsigned long nbParseName = 0;
3196static unsigned long nbParseNmToken = 0;
3197static unsigned long nbParseNCName = 0;
3198static unsigned long nbParseNCNameComplex = 0;
3199static unsigned long nbParseNameComplex = 0;
3200static unsigned long nbParseStringName = 0;
3201#endif
3202
Daniel Veillard34e3f642008-07-29 09:02:27 +00003203/*
3204 * The two following functions are related to the change of accepted
3205 * characters for Name and NmToken in the Revision 5 of XML-1.0
3206 * They correspond to the modified production [4] and the new production [4a]
3207 * changes in that revision. Also note that the macros used for the
3208 * productions Letter, Digit, CombiningChar and Extender are not needed
3209 * anymore.
3210 * We still keep compatibility to pre-revision5 parsing semantic if the
3211 * new XML_PARSE_OLD10 option is given to the parser.
3212 */
3213static int
3214xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3215 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3216 /*
3217 * Use the new checks of production [4] [4a] amd [5] of the
3218 * Update 5 of XML-1.0
3219 */
3220 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3221 (((c >= 'a') && (c <= 'z')) ||
3222 ((c >= 'A') && (c <= 'Z')) ||
3223 (c == '_') || (c == ':') ||
3224 ((c >= 0xC0) && (c <= 0xD6)) ||
3225 ((c >= 0xD8) && (c <= 0xF6)) ||
3226 ((c >= 0xF8) && (c <= 0x2FF)) ||
3227 ((c >= 0x370) && (c <= 0x37D)) ||
3228 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3229 ((c >= 0x200C) && (c <= 0x200D)) ||
3230 ((c >= 0x2070) && (c <= 0x218F)) ||
3231 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3232 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3233 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3234 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3235 ((c >= 0x10000) && (c <= 0xEFFFF))))
3236 return(1);
3237 } else {
3238 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3239 return(1);
3240 }
3241 return(0);
3242}
3243
3244static int
3245xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3246 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3247 /*
3248 * Use the new checks of production [4] [4a] amd [5] of the
3249 * Update 5 of XML-1.0
3250 */
3251 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3252 (((c >= 'a') && (c <= 'z')) ||
3253 ((c >= 'A') && (c <= 'Z')) ||
3254 ((c >= '0') && (c <= '9')) || /* !start */
3255 (c == '_') || (c == ':') ||
3256 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3257 ((c >= 0xC0) && (c <= 0xD6)) ||
3258 ((c >= 0xD8) && (c <= 0xF6)) ||
3259 ((c >= 0xF8) && (c <= 0x2FF)) ||
3260 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3261 ((c >= 0x370) && (c <= 0x37D)) ||
3262 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3263 ((c >= 0x200C) && (c <= 0x200D)) ||
3264 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3265 ((c >= 0x2070) && (c <= 0x218F)) ||
3266 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3267 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3268 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3269 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3270 ((c >= 0x10000) && (c <= 0xEFFFF))))
3271 return(1);
3272 } else {
3273 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3274 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003275 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003276 (IS_COMBINING(c)) ||
3277 (IS_EXTENDER(c)))
3278 return(1);
3279 }
3280 return(0);
3281}
3282
Daniel Veillarde57ec792003-09-10 10:50:59 +00003283static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003284 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003285
Daniel Veillard34e3f642008-07-29 09:02:27 +00003286static const xmlChar *
3287xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3288 int len = 0, l;
3289 int c;
3290 int count = 0;
3291
Daniel Veillardc6561462009-03-25 10:22:31 +00003292#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003293 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003294#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003295
3296 /*
3297 * Handler for more complex cases
3298 */
3299 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003300 if (ctxt->instate == XML_PARSER_EOF)
3301 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003302 c = CUR_CHAR(l);
3303 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3304 /*
3305 * Use the new checks of production [4] [4a] amd [5] of the
3306 * Update 5 of XML-1.0
3307 */
3308 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3309 (!(((c >= 'a') && (c <= 'z')) ||
3310 ((c >= 'A') && (c <= 'Z')) ||
3311 (c == '_') || (c == ':') ||
3312 ((c >= 0xC0) && (c <= 0xD6)) ||
3313 ((c >= 0xD8) && (c <= 0xF6)) ||
3314 ((c >= 0xF8) && (c <= 0x2FF)) ||
3315 ((c >= 0x370) && (c <= 0x37D)) ||
3316 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3317 ((c >= 0x200C) && (c <= 0x200D)) ||
3318 ((c >= 0x2070) && (c <= 0x218F)) ||
3319 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3320 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3321 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3322 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3323 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3324 return(NULL);
3325 }
3326 len += l;
3327 NEXTL(l);
3328 c = CUR_CHAR(l);
3329 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3330 (((c >= 'a') && (c <= 'z')) ||
3331 ((c >= 'A') && (c <= 'Z')) ||
3332 ((c >= '0') && (c <= '9')) || /* !start */
3333 (c == '_') || (c == ':') ||
3334 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3335 ((c >= 0xC0) && (c <= 0xD6)) ||
3336 ((c >= 0xD8) && (c <= 0xF6)) ||
3337 ((c >= 0xF8) && (c <= 0x2FF)) ||
3338 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3339 ((c >= 0x370) && (c <= 0x37D)) ||
3340 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3341 ((c >= 0x200C) && (c <= 0x200D)) ||
3342 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3343 ((c >= 0x2070) && (c <= 0x218F)) ||
3344 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3345 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3346 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3347 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3348 ((c >= 0x10000) && (c <= 0xEFFFF))
3349 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003350 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003351 count = 0;
3352 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003353 if (ctxt->instate == XML_PARSER_EOF)
3354 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003355 }
3356 len += l;
3357 NEXTL(l);
3358 c = CUR_CHAR(l);
3359 }
3360 } else {
3361 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3362 (!IS_LETTER(c) && (c != '_') &&
3363 (c != ':'))) {
3364 return(NULL);
3365 }
3366 len += l;
3367 NEXTL(l);
3368 c = CUR_CHAR(l);
3369
3370 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3371 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3372 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003373 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003374 (IS_COMBINING(c)) ||
3375 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003376 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003377 count = 0;
3378 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003379 if (ctxt->instate == XML_PARSER_EOF)
3380 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381 }
3382 len += l;
3383 NEXTL(l);
3384 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003385 if (c == 0) {
3386 count = 0;
3387 GROW;
3388 if (ctxt->instate == XML_PARSER_EOF)
3389 return(NULL);
3390 c = CUR_CHAR(l);
3391 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003392 }
3393 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003394 if ((len > XML_MAX_NAME_LENGTH) &&
3395 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3396 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3397 return(NULL);
3398 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003399 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3400 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3401 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3402}
3403
Owen Taylor3473f882001-02-23 17:55:21 +00003404/**
3405 * xmlParseName:
3406 * @ctxt: an XML parser context
3407 *
3408 * parse an XML name.
3409 *
3410 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3411 * CombiningChar | Extender
3412 *
3413 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3414 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003415 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003416 *
3417 * Returns the Name parsed or NULL
3418 */
3419
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003420const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003421xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003422 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003423 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003424 int count = 0;
3425
3426 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003427
Daniel Veillardc6561462009-03-25 10:22:31 +00003428#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003429 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003430#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003431
Daniel Veillard48b2f892001-02-25 16:11:03 +00003432 /*
3433 * Accelerator for simple ASCII names
3434 */
3435 in = ctxt->input->cur;
3436 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3437 ((*in >= 0x41) && (*in <= 0x5A)) ||
3438 (*in == '_') || (*in == ':')) {
3439 in++;
3440 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3441 ((*in >= 0x41) && (*in <= 0x5A)) ||
3442 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003443 (*in == '_') || (*in == '-') ||
3444 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003445 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003446 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003447 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003448 if ((count > XML_MAX_NAME_LENGTH) &&
3449 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3450 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3451 return(NULL);
3452 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003453 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003454 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003455 ctxt->nbChars += count;
3456 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003457 if (ret == NULL)
3458 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003459 return(ret);
3460 }
3461 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003462 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003463 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003464}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003465
Daniel Veillard34e3f642008-07-29 09:02:27 +00003466static const xmlChar *
3467xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3468 int len = 0, l;
3469 int c;
3470 int count = 0;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003471 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
Daniel Veillard34e3f642008-07-29 09:02:27 +00003472
Daniel Veillardc6561462009-03-25 10:22:31 +00003473#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003474 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003475#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003476
3477 /*
3478 * Handler for more complex cases
3479 */
3480 GROW;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003481 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003482 c = CUR_CHAR(l);
3483 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3484 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3485 return(NULL);
3486 }
3487
3488 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3489 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003490 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003491 if ((len > XML_MAX_NAME_LENGTH) &&
3492 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3493 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3494 return(NULL);
3495 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003496 count = 0;
3497 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003498 if (ctxt->instate == XML_PARSER_EOF)
3499 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003500 }
3501 len += l;
3502 NEXTL(l);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003503 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003505 if (c == 0) {
3506 count = 0;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003507 /*
3508 * when shrinking to extend the buffer we really need to preserve
3509 * the part of the name we already parsed. Hence rolling back
3510 * by current lenght.
3511 */
3512 ctxt->input->cur -= l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003513 GROW;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003514 ctxt->input->cur += l;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003515 if (ctxt->instate == XML_PARSER_EOF)
3516 return(NULL);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003517 end = ctxt->input->cur;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003518 c = CUR_CHAR(l);
3519 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003520 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003521 if ((len > XML_MAX_NAME_LENGTH) &&
3522 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3523 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3524 return(NULL);
3525 }
Daniel Veillarddcc19502013-05-22 22:56:45 +02003526 return(xmlDictLookup(ctxt->dict, end - len, len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003527}
3528
3529/**
3530 * xmlParseNCName:
3531 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003532 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003533 *
3534 * parse an XML name.
3535 *
3536 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3537 * CombiningChar | Extender
3538 *
3539 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3540 *
3541 * Returns the Name parsed or NULL
3542 */
3543
3544static const xmlChar *
3545xmlParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillard51f02b02015-09-15 16:50:32 +08003546 const xmlChar *in, *e;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003547 const xmlChar *ret;
3548 int count = 0;
3549
Daniel Veillardc6561462009-03-25 10:22:31 +00003550#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003551 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003552#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003553
3554 /*
3555 * Accelerator for simple ASCII names
3556 */
3557 in = ctxt->input->cur;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003558 e = ctxt->input->end;
3559 if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3560 ((*in >= 0x41) && (*in <= 0x5A)) ||
3561 (*in == '_')) && (in < e)) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003562 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003563 while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3564 ((*in >= 0x41) && (*in <= 0x5A)) ||
3565 ((*in >= 0x30) && (*in <= 0x39)) ||
3566 (*in == '_') || (*in == '-') ||
3567 (*in == '.')) && (in < e))
Daniel Veillard34e3f642008-07-29 09:02:27 +00003568 in++;
Daniel Veillard51f02b02015-09-15 16:50:32 +08003569 if (in >= e)
3570 goto complex;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003571 if ((*in > 0) && (*in < 0x80)) {
3572 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003573 if ((count > XML_MAX_NAME_LENGTH) &&
3574 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3575 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3576 return(NULL);
3577 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003578 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3579 ctxt->input->cur = in;
3580 ctxt->nbChars += count;
3581 ctxt->input->col += count;
3582 if (ret == NULL) {
3583 xmlErrMemory(ctxt, NULL);
3584 }
3585 return(ret);
3586 }
3587 }
Daniel Veillard51f02b02015-09-15 16:50:32 +08003588complex:
Daniel Veillard34e3f642008-07-29 09:02:27 +00003589 return(xmlParseNCNameComplex(ctxt));
3590}
3591
Daniel Veillard46de64e2002-05-29 08:21:33 +00003592/**
3593 * xmlParseNameAndCompare:
3594 * @ctxt: an XML parser context
3595 *
3596 * parse an XML name and compares for match
3597 * (specialized for endtag parsing)
3598 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003599 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3600 * and the name for mismatch
3601 */
3602
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003603static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003604xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003605 register const xmlChar *cmp = other;
3606 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003607 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003608
3609 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003610 if (ctxt->instate == XML_PARSER_EOF)
3611 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003612
Daniel Veillard46de64e2002-05-29 08:21:33 +00003613 in = ctxt->input->cur;
3614 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003615 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003616 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003617 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003618 }
William M. Brack76e95df2003-10-18 16:20:14 +00003619 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003620 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003621 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003622 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003623 }
3624 /* failure (or end of input buffer), check with full function */
3625 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003626 /* strings coming from the dictionnary direct compare possible */
3627 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003628 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003629 }
3630 return ret;
3631}
3632
Owen Taylor3473f882001-02-23 17:55:21 +00003633/**
3634 * xmlParseStringName:
3635 * @ctxt: an XML parser context
3636 * @str: a pointer to the string pointer (IN/OUT)
3637 *
3638 * parse an XML name.
3639 *
3640 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3641 * CombiningChar | Extender
3642 *
3643 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3644 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003645 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003646 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003647 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003648 * is updated to the current location in the string.
3649 */
3650
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003651static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003652xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3653 xmlChar buf[XML_MAX_NAMELEN + 5];
3654 const xmlChar *cur = *str;
3655 int len = 0, l;
3656 int c;
3657
Daniel Veillardc6561462009-03-25 10:22:31 +00003658#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003659 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003660#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003661
Owen Taylor3473f882001-02-23 17:55:21 +00003662 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003663 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003664 return(NULL);
3665 }
3666
Daniel Veillard34e3f642008-07-29 09:02:27 +00003667 COPY_BUF(l,buf,len,c);
3668 cur += l;
3669 c = CUR_SCHAR(cur, l);
3670 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003671 COPY_BUF(l,buf,len,c);
3672 cur += l;
3673 c = CUR_SCHAR(cur, l);
3674 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3675 /*
3676 * Okay someone managed to make a huge name, so he's ready to pay
3677 * for the processing speed.
3678 */
3679 xmlChar *buffer;
3680 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003681
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003682 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003683 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003684 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003685 return(NULL);
3686 }
3687 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003688 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003689 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003690 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003691
3692 if ((len > XML_MAX_NAME_LENGTH) &&
3693 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3694 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3695 xmlFree(buffer);
3696 return(NULL);
3697 }
Owen Taylor3473f882001-02-23 17:55:21 +00003698 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003699 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003700 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003701 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003702 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003703 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003704 return(NULL);
3705 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003706 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003707 }
3708 COPY_BUF(l,buffer,len,c);
3709 cur += l;
3710 c = CUR_SCHAR(cur, l);
3711 }
3712 buffer[len] = 0;
3713 *str = cur;
3714 return(buffer);
3715 }
3716 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003717 if ((len > XML_MAX_NAME_LENGTH) &&
3718 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3719 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3720 return(NULL);
3721 }
Owen Taylor3473f882001-02-23 17:55:21 +00003722 *str = cur;
3723 return(xmlStrndup(buf, len));
3724}
3725
3726/**
3727 * xmlParseNmtoken:
3728 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003729 *
Owen Taylor3473f882001-02-23 17:55:21 +00003730 * parse an XML Nmtoken.
3731 *
3732 * [7] Nmtoken ::= (NameChar)+
3733 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003734 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003735 *
3736 * Returns the Nmtoken parsed or NULL
3737 */
3738
3739xmlChar *
3740xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3741 xmlChar buf[XML_MAX_NAMELEN + 5];
3742 int len = 0, l;
3743 int c;
3744 int count = 0;
3745
Daniel Veillardc6561462009-03-25 10:22:31 +00003746#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003747 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003748#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003749
Owen Taylor3473f882001-02-23 17:55:21 +00003750 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003751 if (ctxt->instate == XML_PARSER_EOF)
3752 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003753 c = CUR_CHAR(l);
3754
Daniel Veillard34e3f642008-07-29 09:02:27 +00003755 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003756 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003757 count = 0;
3758 GROW;
3759 }
3760 COPY_BUF(l,buf,len,c);
3761 NEXTL(l);
3762 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003763 if (c == 0) {
3764 count = 0;
3765 GROW;
3766 if (ctxt->instate == XML_PARSER_EOF)
3767 return(NULL);
3768 c = CUR_CHAR(l);
3769 }
Owen Taylor3473f882001-02-23 17:55:21 +00003770 if (len >= XML_MAX_NAMELEN) {
3771 /*
3772 * Okay someone managed to make a huge token, so he's ready to pay
3773 * for the processing speed.
3774 */
3775 xmlChar *buffer;
3776 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003777
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003778 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003779 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003780 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003781 return(NULL);
3782 }
3783 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003784 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003785 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003786 count = 0;
3787 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003788 if (ctxt->instate == XML_PARSER_EOF) {
3789 xmlFree(buffer);
3790 return(NULL);
3791 }
Owen Taylor3473f882001-02-23 17:55:21 +00003792 }
3793 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003794 xmlChar *tmp;
3795
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003796 if ((max > XML_MAX_NAME_LENGTH) &&
3797 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3798 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3799 xmlFree(buffer);
3800 return(NULL);
3801 }
Owen Taylor3473f882001-02-23 17:55:21 +00003802 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003803 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003804 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003805 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003806 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003807 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003808 return(NULL);
3809 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003810 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003811 }
3812 COPY_BUF(l,buffer,len,c);
3813 NEXTL(l);
3814 c = CUR_CHAR(l);
3815 }
3816 buffer[len] = 0;
3817 return(buffer);
3818 }
3819 }
3820 if (len == 0)
3821 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003822 if ((len > XML_MAX_NAME_LENGTH) &&
3823 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3824 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3825 return(NULL);
3826 }
Owen Taylor3473f882001-02-23 17:55:21 +00003827 return(xmlStrndup(buf, len));
3828}
3829
3830/**
3831 * xmlParseEntityValue:
3832 * @ctxt: an XML parser context
3833 * @orig: if non-NULL store a copy of the original entity value
3834 *
3835 * parse a value for ENTITY declarations
3836 *
3837 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3838 * "'" ([^%&'] | PEReference | Reference)* "'"
3839 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003840 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003841 */
3842
3843xmlChar *
3844xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3845 xmlChar *buf = NULL;
3846 int len = 0;
3847 int size = XML_PARSER_BUFFER_SIZE;
3848 int c, l;
3849 xmlChar stop;
3850 xmlChar *ret = NULL;
3851 const xmlChar *cur = NULL;
3852 xmlParserInputPtr input;
3853
3854 if (RAW == '"') stop = '"';
3855 else if (RAW == '\'') stop = '\'';
3856 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003857 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003858 return(NULL);
3859 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003860 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003861 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003862 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003863 return(NULL);
3864 }
3865
3866 /*
3867 * The content of the entity definition is copied in a buffer.
3868 */
3869
3870 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3871 input = ctxt->input;
3872 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003873 if (ctxt->instate == XML_PARSER_EOF) {
3874 xmlFree(buf);
3875 return(NULL);
3876 }
Owen Taylor3473f882001-02-23 17:55:21 +00003877 NEXT;
3878 c = CUR_CHAR(l);
3879 /*
3880 * NOTE: 4.4.5 Included in Literal
3881 * When a parameter entity reference appears in a literal entity
3882 * value, ... a single or double quote character in the replacement
3883 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003884 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003885 * In practice it means we stop the loop only when back at parsing
3886 * the initial entity and the quote is found
3887 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003888 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3889 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003890 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003891 xmlChar *tmp;
3892
Owen Taylor3473f882001-02-23 17:55:21 +00003893 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003894 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3895 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003896 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003897 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003898 return(NULL);
3899 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003900 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003901 }
3902 COPY_BUF(l,buf,len,c);
3903 NEXTL(l);
3904 /*
3905 * Pop-up of finished entities.
3906 */
3907 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3908 xmlPopInput(ctxt);
3909
3910 GROW;
3911 c = CUR_CHAR(l);
3912 if (c == 0) {
3913 GROW;
3914 c = CUR_CHAR(l);
3915 }
3916 }
3917 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003918 if (ctxt->instate == XML_PARSER_EOF) {
3919 xmlFree(buf);
3920 return(NULL);
3921 }
Owen Taylor3473f882001-02-23 17:55:21 +00003922
3923 /*
3924 * Raise problem w.r.t. '&' and '%' being used in non-entities
3925 * reference constructs. Note Charref will be handled in
3926 * xmlStringDecodeEntities()
3927 */
3928 cur = buf;
3929 while (*cur != 0) { /* non input consuming */
3930 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3931 xmlChar *name;
3932 xmlChar tmp = *cur;
3933
3934 cur++;
3935 name = xmlParseStringName(ctxt, &cur);
3936 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003937 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003938 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003939 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003940 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003941 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3942 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003943 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003944 }
3945 if (name != NULL)
3946 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003947 if (*cur == 0)
3948 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003949 }
3950 cur++;
3951 }
3952
3953 /*
3954 * Then PEReference entities are substituted.
3955 */
3956 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003957 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003958 xmlFree(buf);
3959 } else {
3960 NEXT;
3961 /*
3962 * NOTE: 4.4.7 Bypassed
3963 * When a general entity reference appears in the EntityValue in
3964 * an entity declaration, it is bypassed and left as is.
3965 * so XML_SUBSTITUTE_REF is not set here.
3966 */
3967 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3968 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003969 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003970 *orig = buf;
3971 else
3972 xmlFree(buf);
3973 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003974
Owen Taylor3473f882001-02-23 17:55:21 +00003975 return(ret);
3976}
3977
3978/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003979 * xmlParseAttValueComplex:
3980 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003981 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003982 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003983 *
3984 * parse a value for an attribute, this is the fallback function
3985 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003986 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003987 *
3988 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3989 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003990static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003991xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003992 xmlChar limit = 0;
3993 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003994 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003995 size_t len = 0;
3996 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003997 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003998 xmlChar *current = NULL;
3999 xmlEntityPtr ent;
4000
Owen Taylor3473f882001-02-23 17:55:21 +00004001 if (NXT(0) == '"') {
4002 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4003 limit = '"';
4004 NEXT;
4005 } else if (NXT(0) == '\'') {
4006 limit = '\'';
4007 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4008 NEXT;
4009 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004010 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004011 return(NULL);
4012 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00004013
Owen Taylor3473f882001-02-23 17:55:21 +00004014 /*
4015 * allocate a translation buffer.
4016 */
4017 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004018 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004019 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00004020
4021 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004022 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00004023 */
4024 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004025 while (((NXT(0) != limit) && /* checked */
4026 (IS_CHAR(c)) && (c != '<')) &&
4027 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08004028 /*
4029 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
4030 * special option is given
4031 */
4032 if ((len > XML_MAX_TEXT_LENGTH) &&
4033 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4034 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004035 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08004036 goto mem_error;
4037 }
Owen Taylor3473f882001-02-23 17:55:21 +00004038 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00004039 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00004040 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004041 if (NXT(1) == '#') {
4042 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004043
Owen Taylor3473f882001-02-23 17:55:21 +00004044 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00004045 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004046 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004047 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004048 }
4049 buf[len++] = '&';
4050 } else {
4051 /*
4052 * The reparsing will be done in xmlStringGetNodeList()
4053 * called by the attribute() function in SAX.c
4054 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08004055 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004056 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004057 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004058 buf[len++] = '&';
4059 buf[len++] = '#';
4060 buf[len++] = '3';
4061 buf[len++] = '8';
4062 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00004063 }
Daniel Veillarddc171602008-03-26 17:41:38 +00004064 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004065 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004066 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004067 }
Owen Taylor3473f882001-02-23 17:55:21 +00004068 len += xmlCopyChar(0, &buf[len], val);
4069 }
4070 } else {
4071 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004072 ctxt->nbentities++;
4073 if (ent != NULL)
4074 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004075 if ((ent != NULL) &&
4076 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004077 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004078 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004079 }
4080 if ((ctxt->replaceEntities == 0) &&
4081 (ent->content[0] == '&')) {
4082 buf[len++] = '&';
4083 buf[len++] = '#';
4084 buf[len++] = '3';
4085 buf[len++] = '8';
4086 buf[len++] = ';';
4087 } else {
4088 buf[len++] = ent->content[0];
4089 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004090 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004091 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004092 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4093 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004094 XML_SUBSTITUTE_REF,
4095 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004096 if (rep != NULL) {
4097 current = rep;
4098 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004099 if ((*current == 0xD) || (*current == 0xA) ||
4100 (*current == 0x9)) {
4101 buf[len++] = 0x20;
4102 current++;
4103 } else
4104 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004105 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004106 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004107 }
4108 }
4109 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004110 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004111 }
4112 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004113 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004114 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004115 }
Owen Taylor3473f882001-02-23 17:55:21 +00004116 if (ent->content != NULL)
4117 buf[len++] = ent->content[0];
4118 }
4119 } else if (ent != NULL) {
4120 int i = xmlStrlen(ent->name);
4121 const xmlChar *cur = ent->name;
4122
4123 /*
4124 * This may look absurd but is needed to detect
4125 * entities problems
4126 */
4127 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004128 (ent->content != NULL) && (ent->checked == 0)) {
4129 unsigned long oldnbent = ctxt->nbentities;
4130
Owen Taylor3473f882001-02-23 17:55:21 +00004131 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004132 XML_SUBSTITUTE_REF, 0, 0, 0);
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004133
Daniel Veillardcff25462013-03-11 15:57:55 +08004134 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004135 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004136 if (xmlStrchr(rep, '<'))
4137 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004138 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004139 rep = NULL;
4140 }
Owen Taylor3473f882001-02-23 17:55:21 +00004141 }
4142
4143 /*
4144 * Just output the reference
4145 */
4146 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004147 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004148 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004149 }
4150 for (;i > 0;i--)
4151 buf[len++] = *cur++;
4152 buf[len++] = ';';
4153 }
4154 }
4155 } else {
4156 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004157 if ((len != 0) || (!normalize)) {
4158 if ((!normalize) || (!in_space)) {
4159 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004160 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004161 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004162 }
4163 }
4164 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004165 }
4166 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004167 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004168 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004169 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004170 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004171 }
4172 }
4173 NEXTL(l);
4174 }
4175 GROW;
4176 c = CUR_CHAR(l);
4177 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004178 if (ctxt->instate == XML_PARSER_EOF)
4179 goto error;
4180
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004181 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004182 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004183 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004184 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004185 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004186 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004187 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004188 if ((c != 0) && (!IS_CHAR(c))) {
4189 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4190 "invalid character in attribute value\n");
4191 } else {
4192 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4193 "AttValue: ' expected\n");
4194 }
Owen Taylor3473f882001-02-23 17:55:21 +00004195 } else
4196 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004197
4198 /*
4199 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004200 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004201 */
4202 if (len >= INT_MAX) {
4203 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004204 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004205 goto mem_error;
4206 }
4207
4208 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004209 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004210
4211mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004212 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004213error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004214 if (buf != NULL)
4215 xmlFree(buf);
4216 if (rep != NULL)
4217 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004218 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004219}
4220
4221/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004222 * xmlParseAttValue:
4223 * @ctxt: an XML parser context
4224 *
4225 * parse a value for an attribute
4226 * Note: the parser won't do substitution of entities here, this
4227 * will be handled later in xmlStringGetNodeList
4228 *
4229 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4230 * "'" ([^<&'] | Reference)* "'"
4231 *
4232 * 3.3.3 Attribute-Value Normalization:
4233 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004234 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004235 * - a character reference is processed by appending the referenced
4236 * character to the attribute value
4237 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004238 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004239 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4240 * appending #x20 to the normalized value, except that only a single
4241 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004242 * parsed entity or the literal entity value of an internal parsed entity
4243 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004244 * If the declared value is not CDATA, then the XML processor must further
4245 * process the normalized attribute value by discarding any leading and
4246 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004247 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004248 * All attributes for which no declaration has been read should be treated
4249 * by a non-validating parser as if declared CDATA.
4250 *
4251 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4252 */
4253
4254
4255xmlChar *
4256xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004257 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004258 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004259}
4260
4261/**
Owen Taylor3473f882001-02-23 17:55:21 +00004262 * xmlParseSystemLiteral:
4263 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004264 *
Owen Taylor3473f882001-02-23 17:55:21 +00004265 * parse an XML Literal
4266 *
4267 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4268 *
4269 * Returns the SystemLiteral parsed or NULL
4270 */
4271
4272xmlChar *
4273xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4274 xmlChar *buf = NULL;
4275 int len = 0;
4276 int size = XML_PARSER_BUFFER_SIZE;
4277 int cur, l;
4278 xmlChar stop;
4279 int state = ctxt->instate;
4280 int count = 0;
4281
4282 SHRINK;
4283 if (RAW == '"') {
4284 NEXT;
4285 stop = '"';
4286 } else if (RAW == '\'') {
4287 NEXT;
4288 stop = '\'';
4289 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004291 return(NULL);
4292 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004293
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004294 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004295 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004296 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004297 return(NULL);
4298 }
4299 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4300 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004301 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004302 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004303 xmlChar *tmp;
4304
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004305 if ((size > XML_MAX_NAME_LENGTH) &&
4306 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4307 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4308 xmlFree(buf);
4309 ctxt->instate = (xmlParserInputState) state;
4310 return(NULL);
4311 }
Owen Taylor3473f882001-02-23 17:55:21 +00004312 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004313 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4314 if (tmp == NULL) {
4315 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004316 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004317 ctxt->instate = (xmlParserInputState) state;
4318 return(NULL);
4319 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004320 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004321 }
4322 count++;
4323 if (count > 50) {
4324 GROW;
4325 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004326 if (ctxt->instate == XML_PARSER_EOF) {
4327 xmlFree(buf);
4328 return(NULL);
4329 }
Owen Taylor3473f882001-02-23 17:55:21 +00004330 }
4331 COPY_BUF(l,buf,len,cur);
4332 NEXTL(l);
4333 cur = CUR_CHAR(l);
4334 if (cur == 0) {
4335 GROW;
4336 SHRINK;
4337 cur = CUR_CHAR(l);
4338 }
4339 }
4340 buf[len] = 0;
4341 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004342 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004343 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004344 } else {
4345 NEXT;
4346 }
4347 return(buf);
4348}
4349
4350/**
4351 * xmlParsePubidLiteral:
4352 * @ctxt: an XML parser context
4353 *
4354 * parse an XML public literal
4355 *
4356 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4357 *
4358 * Returns the PubidLiteral parsed or NULL.
4359 */
4360
4361xmlChar *
4362xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4363 xmlChar *buf = NULL;
4364 int len = 0;
4365 int size = XML_PARSER_BUFFER_SIZE;
4366 xmlChar cur;
4367 xmlChar stop;
4368 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004369 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004370
4371 SHRINK;
4372 if (RAW == '"') {
4373 NEXT;
4374 stop = '"';
4375 } else if (RAW == '\'') {
4376 NEXT;
4377 stop = '\'';
4378 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004379 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004380 return(NULL);
4381 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004382 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004383 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004384 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004385 return(NULL);
4386 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004387 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004388 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004389 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004390 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004391 xmlChar *tmp;
4392
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004393 if ((size > XML_MAX_NAME_LENGTH) &&
4394 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4395 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4396 xmlFree(buf);
4397 return(NULL);
4398 }
Owen Taylor3473f882001-02-23 17:55:21 +00004399 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004400 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4401 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004402 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004403 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004404 return(NULL);
4405 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004406 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004407 }
4408 buf[len++] = cur;
4409 count++;
4410 if (count > 50) {
4411 GROW;
4412 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004413 if (ctxt->instate == XML_PARSER_EOF) {
4414 xmlFree(buf);
4415 return(NULL);
4416 }
Owen Taylor3473f882001-02-23 17:55:21 +00004417 }
4418 NEXT;
4419 cur = CUR;
4420 if (cur == 0) {
4421 GROW;
4422 SHRINK;
4423 cur = CUR;
4424 }
4425 }
4426 buf[len] = 0;
4427 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004428 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004429 } else {
4430 NEXT;
4431 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004432 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004433 return(buf);
4434}
4435
Daniel Veillard8ed10722009-08-20 19:17:36 +02004436static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004437
4438/*
4439 * used for the test in the inner loop of the char data testing
4440 */
4441static const unsigned char test_char_data[256] = {
4442 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4444 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4447 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4448 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4449 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4450 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4451 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4452 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4453 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4454 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4455 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4456 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4457 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4458 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4459 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4460 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4461 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4462 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4463 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4464 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4465 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4466 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4467 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4468 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4469 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4470 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4471 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4472 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4473 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4474};
4475
Owen Taylor3473f882001-02-23 17:55:21 +00004476/**
4477 * xmlParseCharData:
4478 * @ctxt: an XML parser context
4479 * @cdata: int indicating whether we are within a CDATA section
4480 *
4481 * parse a CharData section.
4482 * if we are within a CDATA section ']]>' marks an end of section.
4483 *
4484 * The right angle bracket (>) may be represented using the string "&gt;",
4485 * and must, for compatibility, be escaped using "&gt;" or a character
4486 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004487 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004488 *
4489 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4490 */
4491
4492void
4493xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004494 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004495 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004496 int line = ctxt->input->line;
4497 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004498 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004499
4500 SHRINK;
4501 GROW;
4502 /*
4503 * Accelerated common case where input don't need to be
4504 * modified before passing it to the handler.
4505 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004506 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004507 in = ctxt->input->cur;
4508 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004509get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004510 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004511 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004512 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004513 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004514 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004515 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004516 goto get_more_space;
4517 }
4518 if (*in == '<') {
4519 nbchar = in - ctxt->input->cur;
4520 if (nbchar > 0) {
4521 const xmlChar *tmp = ctxt->input->cur;
4522 ctxt->input->cur = in;
4523
Daniel Veillard34099b42004-11-04 17:34:35 +00004524 if ((ctxt->sax != NULL) &&
4525 (ctxt->sax->ignorableWhitespace !=
4526 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004527 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004528 if (ctxt->sax->ignorableWhitespace != NULL)
4529 ctxt->sax->ignorableWhitespace(ctxt->userData,
4530 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004531 } else {
4532 if (ctxt->sax->characters != NULL)
4533 ctxt->sax->characters(ctxt->userData,
4534 tmp, nbchar);
4535 if (*ctxt->space == -1)
4536 *ctxt->space = -2;
4537 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004538 } else if ((ctxt->sax != NULL) &&
4539 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004540 ctxt->sax->characters(ctxt->userData,
4541 tmp, nbchar);
4542 }
4543 }
4544 return;
4545 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004546
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004547get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004548 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004549 while (test_char_data[*in]) {
4550 in++;
4551 ccol++;
4552 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004553 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004554 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004555 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004556 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004557 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004558 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004559 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004560 }
4561 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004562 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004563 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004564 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004565 return;
4566 }
4567 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004568 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004569 goto get_more;
4570 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004571 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004572 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004573 if ((ctxt->sax != NULL) &&
4574 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004575 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004576 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004577 const xmlChar *tmp = ctxt->input->cur;
4578 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004579
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004580 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004581 if (ctxt->sax->ignorableWhitespace != NULL)
4582 ctxt->sax->ignorableWhitespace(ctxt->userData,
4583 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004584 } else {
4585 if (ctxt->sax->characters != NULL)
4586 ctxt->sax->characters(ctxt->userData,
4587 tmp, nbchar);
4588 if (*ctxt->space == -1)
4589 *ctxt->space = -2;
4590 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004591 line = ctxt->input->line;
4592 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004593 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004594 if (ctxt->sax->characters != NULL)
4595 ctxt->sax->characters(ctxt->userData,
4596 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004597 line = ctxt->input->line;
4598 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004599 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004600 /* something really bad happened in the SAX callback */
4601 if (ctxt->instate != XML_PARSER_CONTENT)
4602 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004603 }
4604 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004605 if (*in == 0xD) {
4606 in++;
4607 if (*in == 0xA) {
4608 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004609 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004610 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004611 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004612 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004613 in--;
4614 }
4615 if (*in == '<') {
4616 return;
4617 }
4618 if (*in == '&') {
4619 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004620 }
4621 SHRINK;
4622 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004623 if (ctxt->instate == XML_PARSER_EOF)
4624 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004625 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004626 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004627 nbchar = 0;
4628 }
Daniel Veillard50582112001-03-26 22:52:16 +00004629 ctxt->input->line = line;
4630 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004631 xmlParseCharDataComplex(ctxt, cdata);
4632}
4633
Daniel Veillard01c13b52002-12-10 15:19:08 +00004634/**
4635 * xmlParseCharDataComplex:
4636 * @ctxt: an XML parser context
4637 * @cdata: int indicating whether we are within a CDATA section
4638 *
4639 * parse a CharData section.this is the fallback function
4640 * of xmlParseCharData() when the parsing requires handling
4641 * of non-ASCII characters.
4642 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004643static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004644xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004645 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4646 int nbchar = 0;
4647 int cur, l;
4648 int count = 0;
4649
4650 SHRINK;
4651 GROW;
4652 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004653 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004654 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004655 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004656 if ((cur == ']') && (NXT(1) == ']') &&
4657 (NXT(2) == '>')) {
4658 if (cdata) break;
4659 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004660 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004661 }
4662 }
4663 COPY_BUF(l,buf,nbchar,cur);
4664 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004665 buf[nbchar] = 0;
4666
Owen Taylor3473f882001-02-23 17:55:21 +00004667 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004668 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004669 */
4670 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004671 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004672 if (ctxt->sax->ignorableWhitespace != NULL)
4673 ctxt->sax->ignorableWhitespace(ctxt->userData,
4674 buf, nbchar);
4675 } else {
4676 if (ctxt->sax->characters != NULL)
4677 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004678 if ((ctxt->sax->characters !=
4679 ctxt->sax->ignorableWhitespace) &&
4680 (*ctxt->space == -1))
4681 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004682 }
4683 }
4684 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004685 /* something really bad happened in the SAX callback */
4686 if (ctxt->instate != XML_PARSER_CONTENT)
4687 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004688 }
4689 count++;
4690 if (count > 50) {
4691 GROW;
4692 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004693 if (ctxt->instate == XML_PARSER_EOF)
4694 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004695 }
4696 NEXTL(l);
4697 cur = CUR_CHAR(l);
4698 }
4699 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004700 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004701 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004702 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004703 */
4704 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004705 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004706 if (ctxt->sax->ignorableWhitespace != NULL)
4707 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4708 } else {
4709 if (ctxt->sax->characters != NULL)
4710 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004711 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4712 (*ctxt->space == -1))
4713 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004714 }
4715 }
4716 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004717 if ((cur != 0) && (!IS_CHAR(cur))) {
4718 /* Generate the error and skip the offending character */
4719 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4720 "PCDATA invalid Char value %d\n",
4721 cur);
4722 NEXTL(l);
4723 }
Owen Taylor3473f882001-02-23 17:55:21 +00004724}
4725
4726/**
4727 * xmlParseExternalID:
4728 * @ctxt: an XML parser context
4729 * @publicID: a xmlChar** receiving PubidLiteral
4730 * @strict: indicate whether we should restrict parsing to only
4731 * production [75], see NOTE below
4732 *
4733 * Parse an External ID or a Public ID
4734 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004735 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004736 * 'PUBLIC' S PubidLiteral S SystemLiteral
4737 *
4738 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4739 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4740 *
4741 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4742 *
4743 * Returns the function returns SystemLiteral and in the second
4744 * case publicID receives PubidLiteral, is strict is off
4745 * it is possible to return NULL and have publicID set.
4746 */
4747
4748xmlChar *
4749xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4750 xmlChar *URI = NULL;
4751
4752 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004753
4754 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004755 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004756 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004757 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004758 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4759 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004760 }
4761 SKIP_BLANKS;
4762 URI = xmlParseSystemLiteral(ctxt);
4763 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004764 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004765 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004766 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004767 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004768 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004769 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004770 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004771 }
4772 SKIP_BLANKS;
4773 *publicID = xmlParsePubidLiteral(ctxt);
4774 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004775 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004776 }
4777 if (strict) {
4778 /*
4779 * We don't handle [83] so "S SystemLiteral" is required.
4780 */
William M. Brack76e95df2003-10-18 16:20:14 +00004781 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004782 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004783 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004784 }
4785 } else {
4786 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004787 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004788 * "S SystemLiteral" is not detected. From a purely parsing
4789 * point of view that's a nice mess.
4790 */
4791 const xmlChar *ptr;
4792 GROW;
4793
4794 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004795 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004796
William M. Brack76e95df2003-10-18 16:20:14 +00004797 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004798 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4799 }
4800 SKIP_BLANKS;
4801 URI = xmlParseSystemLiteral(ctxt);
4802 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004803 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004804 }
4805 }
4806 return(URI);
4807}
4808
4809/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004810 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004811 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004812 * @buf: the already parsed part of the buffer
4813 * @len: number of bytes filles in the buffer
4814 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004815 *
4816 * Skip an XML (SGML) comment <!-- .... -->
4817 * The spec says that "For compatibility, the string "--" (double-hyphen)
4818 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004819 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004820 *
4821 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4822 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004823static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004824xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4825 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004826 int q, ql;
4827 int r, rl;
4828 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004829 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004830 int inputid;
4831
4832 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004833
Owen Taylor3473f882001-02-23 17:55:21 +00004834 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004835 len = 0;
4836 size = XML_PARSER_BUFFER_SIZE;
4837 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4838 if (buf == NULL) {
4839 xmlErrMemory(ctxt, NULL);
4840 return;
4841 }
Owen Taylor3473f882001-02-23 17:55:21 +00004842 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004843 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004844 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004845 if (q == 0)
4846 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004847 if (!IS_CHAR(q)) {
4848 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4849 "xmlParseComment: invalid xmlChar value %d\n",
4850 q);
4851 xmlFree (buf);
4852 return;
4853 }
Owen Taylor3473f882001-02-23 17:55:21 +00004854 NEXTL(ql);
4855 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004856 if (r == 0)
4857 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004858 if (!IS_CHAR(r)) {
4859 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4860 "xmlParseComment: invalid xmlChar value %d\n",
4861 q);
4862 xmlFree (buf);
4863 return;
4864 }
Owen Taylor3473f882001-02-23 17:55:21 +00004865 NEXTL(rl);
4866 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004867 if (cur == 0)
4868 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004869 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004870 ((cur != '>') ||
4871 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004872 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004873 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004874 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004875 if ((len > XML_MAX_TEXT_LENGTH) &&
4876 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4877 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4878 "Comment too big found", NULL);
4879 xmlFree (buf);
4880 return;
4881 }
Owen Taylor3473f882001-02-23 17:55:21 +00004882 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004883 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004884 size_t new_size;
4885
4886 new_size = size * 2;
4887 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004888 if (new_buf == NULL) {
4889 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004890 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004891 return;
4892 }
William M. Bracka3215c72004-07-31 16:24:01 +00004893 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004894 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004895 }
4896 COPY_BUF(ql,buf,len,q);
4897 q = r;
4898 ql = rl;
4899 r = cur;
4900 rl = l;
4901
4902 count++;
4903 if (count > 50) {
4904 GROW;
4905 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004906 if (ctxt->instate == XML_PARSER_EOF) {
4907 xmlFree(buf);
4908 return;
4909 }
Owen Taylor3473f882001-02-23 17:55:21 +00004910 }
4911 NEXTL(l);
4912 cur = CUR_CHAR(l);
4913 if (cur == 0) {
4914 SHRINK;
4915 GROW;
4916 cur = CUR_CHAR(l);
4917 }
4918 }
4919 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004920 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004921 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004922 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004923 } else if (!IS_CHAR(cur)) {
4924 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4925 "xmlParseComment: invalid xmlChar value %d\n",
4926 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004927 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004928 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004929 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4930 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004931 }
4932 NEXT;
4933 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4934 (!ctxt->disableSAX))
4935 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004936 }
Daniel Veillardda629342007-08-01 07:49:06 +00004937 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004938 return;
4939not_terminated:
4940 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4941 "Comment not terminated\n", NULL);
4942 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004943 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004944}
Daniel Veillardda629342007-08-01 07:49:06 +00004945
Daniel Veillard4c778d82005-01-23 17:37:44 +00004946/**
4947 * xmlParseComment:
4948 * @ctxt: an XML parser context
4949 *
4950 * Skip an XML (SGML) comment <!-- .... -->
4951 * The spec says that "For compatibility, the string "--" (double-hyphen)
4952 * must not occur within comments. "
4953 *
4954 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4955 */
4956void
4957xmlParseComment(xmlParserCtxtPtr ctxt) {
4958 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004959 size_t size = XML_PARSER_BUFFER_SIZE;
4960 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004961 xmlParserInputState state;
4962 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004963 size_t nbchar = 0;
4964 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004965 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004966
4967 /*
4968 * Check that there is a comment right here.
4969 */
4970 if ((RAW != '<') || (NXT(1) != '!') ||
4971 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004972 state = ctxt->instate;
4973 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004974 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004975 SKIP(4);
4976 SHRINK;
4977 GROW;
4978
4979 /*
4980 * Accelerated common case where input don't need to be
4981 * modified before passing it to the handler.
4982 */
4983 in = ctxt->input->cur;
4984 do {
4985 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004986 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004987 ctxt->input->line++; ctxt->input->col = 1;
4988 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004989 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004990 }
4991get_more:
4992 ccol = ctxt->input->col;
4993 while (((*in > '-') && (*in <= 0x7F)) ||
4994 ((*in >= 0x20) && (*in < '-')) ||
4995 (*in == 0x09)) {
4996 in++;
4997 ccol++;
4998 }
4999 ctxt->input->col = ccol;
5000 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005001 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00005002 ctxt->input->line++; ctxt->input->col = 1;
5003 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00005004 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005005 goto get_more;
5006 }
5007 nbchar = in - ctxt->input->cur;
5008 /*
5009 * save current set of data
5010 */
5011 if (nbchar > 0) {
5012 if ((ctxt->sax != NULL) &&
5013 (ctxt->sax->comment != NULL)) {
5014 if (buf == NULL) {
5015 if ((*in == '-') && (in[1] == '-'))
5016 size = nbchar + 1;
5017 else
5018 size = XML_PARSER_BUFFER_SIZE + nbchar;
5019 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5020 if (buf == NULL) {
5021 xmlErrMemory(ctxt, NULL);
5022 ctxt->instate = state;
5023 return;
5024 }
5025 len = 0;
5026 } else if (len + nbchar + 1 >= size) {
5027 xmlChar *new_buf;
5028 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5029 new_buf = (xmlChar *) xmlRealloc(buf,
5030 size * sizeof(xmlChar));
5031 if (new_buf == NULL) {
5032 xmlFree (buf);
5033 xmlErrMemory(ctxt, NULL);
5034 ctxt->instate = state;
5035 return;
5036 }
5037 buf = new_buf;
5038 }
5039 memcpy(&buf[len], ctxt->input->cur, nbchar);
5040 len += nbchar;
5041 buf[len] = 0;
5042 }
5043 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08005044 if ((len > XML_MAX_TEXT_LENGTH) &&
5045 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5046 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5047 "Comment too big found", NULL);
5048 xmlFree (buf);
5049 return;
5050 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005051 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00005052 if (*in == 0xA) {
5053 in++;
5054 ctxt->input->line++; ctxt->input->col = 1;
5055 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005056 if (*in == 0xD) {
5057 in++;
5058 if (*in == 0xA) {
5059 ctxt->input->cur = in;
5060 in++;
5061 ctxt->input->line++; ctxt->input->col = 1;
5062 continue; /* while */
5063 }
5064 in--;
5065 }
5066 SHRINK;
5067 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005068 if (ctxt->instate == XML_PARSER_EOF) {
5069 xmlFree(buf);
5070 return;
5071 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005072 in = ctxt->input->cur;
5073 if (*in == '-') {
5074 if (in[1] == '-') {
5075 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005076 if (ctxt->input->id != inputid) {
5077 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5078 "comment doesn't start and stop in the same entity\n");
5079 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005080 SKIP(3);
5081 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5082 (!ctxt->disableSAX)) {
5083 if (buf != NULL)
5084 ctxt->sax->comment(ctxt->userData, buf);
5085 else
5086 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5087 }
5088 if (buf != NULL)
5089 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005090 if (ctxt->instate != XML_PARSER_EOF)
5091 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005092 return;
5093 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005094 if (buf != NULL) {
5095 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5096 "Double hyphen within comment: "
5097 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005098 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005099 } else
5100 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5101 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005102 in++;
5103 ctxt->input->col++;
5104 }
5105 in++;
5106 ctxt->input->col++;
5107 goto get_more;
5108 }
5109 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5110 xmlParseCommentComplex(ctxt, buf, len, size);
5111 ctxt->instate = state;
5112 return;
5113}
5114
Owen Taylor3473f882001-02-23 17:55:21 +00005115
5116/**
5117 * xmlParsePITarget:
5118 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005119 *
Owen Taylor3473f882001-02-23 17:55:21 +00005120 * parse the name of a PI
5121 *
5122 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5123 *
5124 * Returns the PITarget name or NULL
5125 */
5126
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005127const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005128xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005129 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005130
5131 name = xmlParseName(ctxt);
5132 if ((name != NULL) &&
5133 ((name[0] == 'x') || (name[0] == 'X')) &&
5134 ((name[1] == 'm') || (name[1] == 'M')) &&
5135 ((name[2] == 'l') || (name[2] == 'L'))) {
5136 int i;
5137 if ((name[0] == 'x') && (name[1] == 'm') &&
5138 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005139 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005140 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005141 return(name);
5142 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005143 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005144 return(name);
5145 }
5146 for (i = 0;;i++) {
5147 if (xmlW3CPIs[i] == NULL) break;
5148 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5149 return(name);
5150 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005151 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5152 "xmlParsePITarget: invalid name prefix 'xml'\n",
5153 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005154 }
Daniel Veillard37334572008-07-31 08:20:02 +00005155 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005156 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005157 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005158 }
Owen Taylor3473f882001-02-23 17:55:21 +00005159 return(name);
5160}
5161
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005162#ifdef LIBXML_CATALOG_ENABLED
5163/**
5164 * xmlParseCatalogPI:
5165 * @ctxt: an XML parser context
5166 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005167 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005168 * parse an XML Catalog Processing Instruction.
5169 *
5170 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5171 *
5172 * Occurs only if allowed by the user and if happening in the Misc
5173 * part of the document before any doctype informations
5174 * This will add the given catalog to the parsing context in order
5175 * to be used if there is a resolution need further down in the document
5176 */
5177
5178static void
5179xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5180 xmlChar *URL = NULL;
5181 const xmlChar *tmp, *base;
5182 xmlChar marker;
5183
5184 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005185 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005186 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5187 goto error;
5188 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005189 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005190 if (*tmp != '=') {
5191 return;
5192 }
5193 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005194 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005195 marker = *tmp;
5196 if ((marker != '\'') && (marker != '"'))
5197 goto error;
5198 tmp++;
5199 base = tmp;
5200 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5201 if (*tmp == 0)
5202 goto error;
5203 URL = xmlStrndup(base, tmp - base);
5204 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005205 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005206 if (*tmp != 0)
5207 goto error;
5208
5209 if (URL != NULL) {
5210 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5211 xmlFree(URL);
5212 }
5213 return;
5214
5215error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005216 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5217 "Catalog PI syntax error: %s\n",
5218 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005219 if (URL != NULL)
5220 xmlFree(URL);
5221}
5222#endif
5223
Owen Taylor3473f882001-02-23 17:55:21 +00005224/**
5225 * xmlParsePI:
5226 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005227 *
Owen Taylor3473f882001-02-23 17:55:21 +00005228 * parse an XML Processing Instruction.
5229 *
5230 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5231 *
5232 * The processing is transfered to SAX once parsed.
5233 */
5234
5235void
5236xmlParsePI(xmlParserCtxtPtr ctxt) {
5237 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005238 size_t len = 0;
5239 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005240 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005241 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005242 xmlParserInputState state;
5243 int count = 0;
5244
5245 if ((RAW == '<') && (NXT(1) == '?')) {
5246 xmlParserInputPtr input = ctxt->input;
5247 state = ctxt->instate;
5248 ctxt->instate = XML_PARSER_PI;
5249 /*
5250 * this is a Processing Instruction.
5251 */
5252 SKIP(2);
5253 SHRINK;
5254
5255 /*
5256 * Parse the target name and check for special support like
5257 * namespace.
5258 */
5259 target = xmlParsePITarget(ctxt);
5260 if (target != NULL) {
5261 if ((RAW == '?') && (NXT(1) == '>')) {
5262 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005263 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005265 }
5266 SKIP(2);
5267
5268 /*
5269 * SAX: PI detected.
5270 */
5271 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5272 (ctxt->sax->processingInstruction != NULL))
5273 ctxt->sax->processingInstruction(ctxt->userData,
5274 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005275 if (ctxt->instate != XML_PARSER_EOF)
5276 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005277 return;
5278 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005279 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005280 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005281 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005282 ctxt->instate = state;
5283 return;
5284 }
5285 cur = CUR;
5286 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005287 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005289 }
5290 SKIP_BLANKS;
5291 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005292 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005293 ((cur != '?') || (NXT(1) != '>'))) {
5294 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005295 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005296 size_t new_size = size * 2;
5297 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005298 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005299 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005300 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005301 ctxt->instate = state;
5302 return;
5303 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005304 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005305 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005306 }
5307 count++;
5308 if (count > 50) {
5309 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005310 if (ctxt->instate == XML_PARSER_EOF) {
5311 xmlFree(buf);
5312 return;
5313 }
Owen Taylor3473f882001-02-23 17:55:21 +00005314 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005315 if ((len > XML_MAX_TEXT_LENGTH) &&
5316 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5317 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5318 "PI %s too big found", target);
5319 xmlFree(buf);
5320 ctxt->instate = state;
5321 return;
5322 }
Owen Taylor3473f882001-02-23 17:55:21 +00005323 }
5324 COPY_BUF(l,buf,len,cur);
5325 NEXTL(l);
5326 cur = CUR_CHAR(l);
5327 if (cur == 0) {
5328 SHRINK;
5329 GROW;
5330 cur = CUR_CHAR(l);
5331 }
5332 }
Daniel Veillard51304812012-07-19 20:34:26 +08005333 if ((len > XML_MAX_TEXT_LENGTH) &&
5334 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5335 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5336 "PI %s too big found", target);
5337 xmlFree(buf);
5338 ctxt->instate = state;
5339 return;
5340 }
Owen Taylor3473f882001-02-23 17:55:21 +00005341 buf[len] = 0;
5342 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005343 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5344 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005345 } else {
5346 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5348 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005349 }
5350 SKIP(2);
5351
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005352#ifdef LIBXML_CATALOG_ENABLED
5353 if (((state == XML_PARSER_MISC) ||
5354 (state == XML_PARSER_START)) &&
5355 (xmlStrEqual(target, XML_CATALOG_PI))) {
5356 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5357 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5358 (allow == XML_CATA_ALLOW_ALL))
5359 xmlParseCatalogPI(ctxt, buf);
5360 }
5361#endif
5362
5363
Owen Taylor3473f882001-02-23 17:55:21 +00005364 /*
5365 * SAX: PI detected.
5366 */
5367 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5368 (ctxt->sax->processingInstruction != NULL))
5369 ctxt->sax->processingInstruction(ctxt->userData,
5370 target, buf);
5371 }
5372 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005373 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005374 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005375 }
Chris Evans77404b82011-12-14 16:18:25 +08005376 if (ctxt->instate != XML_PARSER_EOF)
5377 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005378 }
5379}
5380
5381/**
5382 * xmlParseNotationDecl:
5383 * @ctxt: an XML parser context
5384 *
5385 * parse a notation declaration
5386 *
5387 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5388 *
5389 * Hence there is actually 3 choices:
5390 * 'PUBLIC' S PubidLiteral
5391 * 'PUBLIC' S PubidLiteral S SystemLiteral
5392 * and 'SYSTEM' S SystemLiteral
5393 *
5394 * See the NOTE on xmlParseExternalID().
5395 */
5396
5397void
5398xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005399 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 xmlChar *Pubid;
5401 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005402
Daniel Veillarda07050d2003-10-19 14:46:32 +00005403 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005404 xmlParserInputPtr input = ctxt->input;
5405 SHRINK;
5406 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005407 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005408 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5409 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005410 return;
5411 }
5412 SKIP_BLANKS;
5413
Daniel Veillard76d66f42001-05-16 21:05:17 +00005414 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005415 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005416 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005417 return;
5418 }
William M. Brack76e95df2003-10-18 16:20:14 +00005419 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005420 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005421 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005422 return;
5423 }
Daniel Veillard37334572008-07-31 08:20:02 +00005424 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005425 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005426 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005427 name, NULL, NULL);
5428 }
Owen Taylor3473f882001-02-23 17:55:21 +00005429 SKIP_BLANKS;
5430
5431 /*
5432 * Parse the IDs.
5433 */
5434 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5435 SKIP_BLANKS;
5436
5437 if (RAW == '>') {
5438 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005439 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5440 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005441 }
5442 NEXT;
5443 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5444 (ctxt->sax->notationDecl != NULL))
5445 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5446 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005447 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005448 }
Owen Taylor3473f882001-02-23 17:55:21 +00005449 if (Systemid != NULL) xmlFree(Systemid);
5450 if (Pubid != NULL) xmlFree(Pubid);
5451 }
5452}
5453
5454/**
5455 * xmlParseEntityDecl:
5456 * @ctxt: an XML parser context
5457 *
5458 * parse <!ENTITY declarations
5459 *
5460 * [70] EntityDecl ::= GEDecl | PEDecl
5461 *
5462 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5463 *
5464 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5465 *
5466 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5467 *
5468 * [74] PEDef ::= EntityValue | ExternalID
5469 *
5470 * [76] NDataDecl ::= S 'NDATA' S Name
5471 *
5472 * [ VC: Notation Declared ]
5473 * The Name must match the declared name of a notation.
5474 */
5475
5476void
5477xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005478 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005479 xmlChar *value = NULL;
5480 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005481 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005482 int isParameter = 0;
5483 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005484 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005485
Daniel Veillard4c778d82005-01-23 17:37:44 +00005486 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005487 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005488 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005489 SHRINK;
5490 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005491 skipped = SKIP_BLANKS;
5492 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005493 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5494 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005495 }
Owen Taylor3473f882001-02-23 17:55:21 +00005496
5497 if (RAW == '%') {
5498 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005499 skipped = SKIP_BLANKS;
5500 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005501 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005503 }
Owen Taylor3473f882001-02-23 17:55:21 +00005504 isParameter = 1;
5505 }
5506
Daniel Veillard76d66f42001-05-16 21:05:17 +00005507 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005508 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005509 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005511 return;
5512 }
Daniel Veillard37334572008-07-31 08:20:02 +00005513 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005514 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005515 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005516 name, NULL, NULL);
5517 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005518 skipped = SKIP_BLANKS;
5519 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005520 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5521 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005522 }
Owen Taylor3473f882001-02-23 17:55:21 +00005523
Daniel Veillardf5582f12002-06-11 10:08:16 +00005524 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005525 /*
5526 * handle the various case of definitions...
5527 */
5528 if (isParameter) {
5529 if ((RAW == '"') || (RAW == '\'')) {
5530 value = xmlParseEntityValue(ctxt, &orig);
5531 if (value) {
5532 if ((ctxt->sax != NULL) &&
5533 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5534 ctxt->sax->entityDecl(ctxt->userData, name,
5535 XML_INTERNAL_PARAMETER_ENTITY,
5536 NULL, NULL, value);
5537 }
5538 } else {
5539 URI = xmlParseExternalID(ctxt, &literal, 1);
5540 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005541 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005542 }
5543 if (URI) {
5544 xmlURIPtr uri;
5545
5546 uri = xmlParseURI((const char *) URI);
5547 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005548 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5549 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005550 /*
5551 * This really ought to be a well formedness error
5552 * but the XML Core WG decided otherwise c.f. issue
5553 * E26 of the XML erratas.
5554 */
Owen Taylor3473f882001-02-23 17:55:21 +00005555 } else {
5556 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005557 /*
5558 * Okay this is foolish to block those but not
5559 * invalid URIs.
5560 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005561 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005562 } else {
5563 if ((ctxt->sax != NULL) &&
5564 (!ctxt->disableSAX) &&
5565 (ctxt->sax->entityDecl != NULL))
5566 ctxt->sax->entityDecl(ctxt->userData, name,
5567 XML_EXTERNAL_PARAMETER_ENTITY,
5568 literal, URI, NULL);
5569 }
5570 xmlFreeURI(uri);
5571 }
5572 }
5573 }
5574 } else {
5575 if ((RAW == '"') || (RAW == '\'')) {
5576 value = xmlParseEntityValue(ctxt, &orig);
5577 if ((ctxt->sax != NULL) &&
5578 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5579 ctxt->sax->entityDecl(ctxt->userData, name,
5580 XML_INTERNAL_GENERAL_ENTITY,
5581 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005582 /*
5583 * For expat compatibility in SAX mode.
5584 */
5585 if ((ctxt->myDoc == NULL) ||
5586 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5587 if (ctxt->myDoc == NULL) {
5588 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005589 if (ctxt->myDoc == NULL) {
5590 xmlErrMemory(ctxt, "New Doc failed");
5591 return;
5592 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005593 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005594 }
5595 if (ctxt->myDoc->intSubset == NULL)
5596 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5597 BAD_CAST "fake", NULL, NULL);
5598
Daniel Veillard1af9a412003-08-20 22:54:39 +00005599 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5600 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005601 }
Owen Taylor3473f882001-02-23 17:55:21 +00005602 } else {
5603 URI = xmlParseExternalID(ctxt, &literal, 1);
5604 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005605 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005606 }
5607 if (URI) {
5608 xmlURIPtr uri;
5609
5610 uri = xmlParseURI((const char *)URI);
5611 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005612 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5613 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005614 /*
5615 * This really ought to be a well formedness error
5616 * but the XML Core WG decided otherwise c.f. issue
5617 * E26 of the XML erratas.
5618 */
Owen Taylor3473f882001-02-23 17:55:21 +00005619 } else {
5620 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005621 /*
5622 * Okay this is foolish to block those but not
5623 * invalid URIs.
5624 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005625 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005626 }
5627 xmlFreeURI(uri);
5628 }
5629 }
William M. Brack76e95df2003-10-18 16:20:14 +00005630 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005631 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5632 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005633 }
5634 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005635 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005636 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005637 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005638 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5639 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005640 }
5641 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005642 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005643 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5644 (ctxt->sax->unparsedEntityDecl != NULL))
5645 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5646 literal, URI, ndata);
5647 } else {
5648 if ((ctxt->sax != NULL) &&
5649 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5650 ctxt->sax->entityDecl(ctxt->userData, name,
5651 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5652 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005653 /*
5654 * For expat compatibility in SAX mode.
5655 * assuming the entity repalcement was asked for
5656 */
5657 if ((ctxt->replaceEntities != 0) &&
5658 ((ctxt->myDoc == NULL) ||
5659 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5660 if (ctxt->myDoc == NULL) {
5661 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005662 if (ctxt->myDoc == NULL) {
5663 xmlErrMemory(ctxt, "New Doc failed");
5664 return;
5665 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005666 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005667 }
5668
5669 if (ctxt->myDoc->intSubset == NULL)
5670 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5671 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005672 xmlSAX2EntityDecl(ctxt, name,
5673 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5674 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005675 }
Owen Taylor3473f882001-02-23 17:55:21 +00005676 }
5677 }
5678 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005679 if (ctxt->instate == XML_PARSER_EOF)
5680 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005681 SKIP_BLANKS;
5682 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005683 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005684 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarde3b15972015-11-20 14:59:30 +08005685 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005686 } else {
5687 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005688 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5689 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005690 }
5691 NEXT;
5692 }
5693 if (orig != NULL) {
5694 /*
5695 * Ugly mechanism to save the raw entity value.
5696 */
5697 xmlEntityPtr cur = NULL;
5698
5699 if (isParameter) {
5700 if ((ctxt->sax != NULL) &&
5701 (ctxt->sax->getParameterEntity != NULL))
5702 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5703 } else {
5704 if ((ctxt->sax != NULL) &&
5705 (ctxt->sax->getEntity != NULL))
5706 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005707 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005708 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005709 }
Owen Taylor3473f882001-02-23 17:55:21 +00005710 }
5711 if (cur != NULL) {
5712 if (cur->orig != NULL)
5713 xmlFree(orig);
5714 else
5715 cur->orig = orig;
5716 } else
5717 xmlFree(orig);
5718 }
Owen Taylor3473f882001-02-23 17:55:21 +00005719 if (value != NULL) xmlFree(value);
5720 if (URI != NULL) xmlFree(URI);
5721 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005722 }
5723}
5724
5725/**
5726 * xmlParseDefaultDecl:
5727 * @ctxt: an XML parser context
5728 * @value: Receive a possible fixed default value for the attribute
5729 *
5730 * Parse an attribute default declaration
5731 *
5732 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733 *
5734 * [ VC: Required Attribute ]
5735 * if the default declaration is the keyword #REQUIRED, then the
5736 * attribute must be specified for all elements of the type in the
5737 * attribute-list declaration.
5738 *
5739 * [ VC: Attribute Default Legal ]
5740 * The declared default value must meet the lexical constraints of
5741 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742 *
5743 * [ VC: Fixed Attribute Default ]
5744 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005745 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005746 *
5747 * [ WFC: No < in Attribute Values ]
5748 * handled in xmlParseAttValue()
5749 *
5750 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005751 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005752 */
5753
5754int
5755xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756 int val;
5757 xmlChar *ret;
5758
5759 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005760 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005761 SKIP(9);
5762 return(XML_ATTRIBUTE_REQUIRED);
5763 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005764 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005765 SKIP(8);
5766 return(XML_ATTRIBUTE_IMPLIED);
5767 }
5768 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005769 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005770 SKIP(6);
5771 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005772 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005773 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005775 }
5776 SKIP_BLANKS;
5777 }
5778 ret = xmlParseAttValue(ctxt);
5779 ctxt->instate = XML_PARSER_DTD;
5780 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005781 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005782 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005783 } else
5784 *value = ret;
5785 return(val);
5786}
5787
5788/**
5789 * xmlParseNotationType:
5790 * @ctxt: an XML parser context
5791 *
5792 * parse an Notation attribute type.
5793 *
5794 * Note: the leading 'NOTATION' S part has already being parsed...
5795 *
5796 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5797 *
5798 * [ VC: Notation Attributes ]
5799 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005800 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005801 *
5802 * Returns: the notation attribute tree built while parsing
5803 */
5804
5805xmlEnumerationPtr
5806xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005807 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005808 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005809
5810 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005811 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005812 return(NULL);
5813 }
5814 SHRINK;
5815 do {
5816 NEXT;
5817 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005818 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005819 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005820 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5821 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005822 xmlFreeEnumeration(ret);
5823 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005824 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005825 tmp = ret;
5826 while (tmp != NULL) {
5827 if (xmlStrEqual(name, tmp->name)) {
5828 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5829 "standalone: attribute notation value token %s duplicated\n",
5830 name, NULL);
5831 if (!xmlDictOwns(ctxt->dict, name))
5832 xmlFree((xmlChar *) name);
5833 break;
5834 }
5835 tmp = tmp->next;
5836 }
5837 if (tmp == NULL) {
5838 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005839 if (cur == NULL) {
5840 xmlFreeEnumeration(ret);
5841 return(NULL);
5842 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005843 if (last == NULL) ret = last = cur;
5844 else {
5845 last->next = cur;
5846 last = cur;
5847 }
Owen Taylor3473f882001-02-23 17:55:21 +00005848 }
5849 SKIP_BLANKS;
5850 } while (RAW == '|');
5851 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005852 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005853 xmlFreeEnumeration(ret);
5854 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005855 }
5856 NEXT;
5857 return(ret);
5858}
5859
5860/**
5861 * xmlParseEnumerationType:
5862 * @ctxt: an XML parser context
5863 *
5864 * parse an Enumeration attribute type.
5865 *
5866 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5867 *
5868 * [ VC: Enumeration ]
5869 * Values of this type must match one of the Nmtoken tokens in
5870 * the declaration
5871 *
5872 * Returns: the enumeration attribute tree built while parsing
5873 */
5874
5875xmlEnumerationPtr
5876xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5877 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005878 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005879
5880 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005881 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005882 return(NULL);
5883 }
5884 SHRINK;
5885 do {
5886 NEXT;
5887 SKIP_BLANKS;
5888 name = xmlParseNmtoken(ctxt);
5889 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005890 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 return(ret);
5892 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005893 tmp = ret;
5894 while (tmp != NULL) {
5895 if (xmlStrEqual(name, tmp->name)) {
5896 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5897 "standalone: attribute enumeration value token %s duplicated\n",
5898 name, NULL);
5899 if (!xmlDictOwns(ctxt->dict, name))
5900 xmlFree(name);
5901 break;
5902 }
5903 tmp = tmp->next;
5904 }
5905 if (tmp == NULL) {
5906 cur = xmlCreateEnumeration(name);
5907 if (!xmlDictOwns(ctxt->dict, name))
5908 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005909 if (cur == NULL) {
5910 xmlFreeEnumeration(ret);
5911 return(NULL);
5912 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005913 if (last == NULL) ret = last = cur;
5914 else {
5915 last->next = cur;
5916 last = cur;
5917 }
Owen Taylor3473f882001-02-23 17:55:21 +00005918 }
5919 SKIP_BLANKS;
5920 } while (RAW == '|');
5921 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005922 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005923 return(ret);
5924 }
5925 NEXT;
5926 return(ret);
5927}
5928
5929/**
5930 * xmlParseEnumeratedType:
5931 * @ctxt: an XML parser context
5932 * @tree: the enumeration tree built while parsing
5933 *
5934 * parse an Enumerated attribute type.
5935 *
5936 * [57] EnumeratedType ::= NotationType | Enumeration
5937 *
5938 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5939 *
5940 *
5941 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5942 */
5943
5944int
5945xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005946 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005947 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005948 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005949 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5950 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005951 return(0);
5952 }
5953 SKIP_BLANKS;
5954 *tree = xmlParseNotationType(ctxt);
5955 if (*tree == NULL) return(0);
5956 return(XML_ATTRIBUTE_NOTATION);
5957 }
5958 *tree = xmlParseEnumerationType(ctxt);
5959 if (*tree == NULL) return(0);
5960 return(XML_ATTRIBUTE_ENUMERATION);
5961}
5962
5963/**
5964 * xmlParseAttributeType:
5965 * @ctxt: an XML parser context
5966 * @tree: the enumeration tree built while parsing
5967 *
5968 * parse the Attribute list def for an element
5969 *
5970 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5971 *
5972 * [55] StringType ::= 'CDATA'
5973 *
5974 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5975 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5976 *
5977 * Validity constraints for attribute values syntax are checked in
5978 * xmlValidateAttributeValue()
5979 *
5980 * [ VC: ID ]
5981 * Values of type ID must match the Name production. A name must not
5982 * appear more than once in an XML document as a value of this type;
5983 * i.e., ID values must uniquely identify the elements which bear them.
5984 *
5985 * [ VC: One ID per Element Type ]
5986 * No element type may have more than one ID attribute specified.
5987 *
5988 * [ VC: ID Attribute Default ]
5989 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5990 *
5991 * [ VC: IDREF ]
5992 * Values of type IDREF must match the Name production, and values
5993 * of type IDREFS must match Names; each IDREF Name must match the value
5994 * of an ID attribute on some element in the XML document; i.e. IDREF
5995 * values must match the value of some ID attribute.
5996 *
5997 * [ VC: Entity Name ]
5998 * Values of type ENTITY must match the Name production, values
5999 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006000 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00006001 *
6002 * [ VC: Name Token ]
6003 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006004 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00006005 *
6006 * Returns the attribute type
6007 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006008int
Owen Taylor3473f882001-02-23 17:55:21 +00006009xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6010 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006011 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006012 SKIP(5);
6013 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006014 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006015 SKIP(6);
6016 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006017 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006018 SKIP(5);
6019 return(XML_ATTRIBUTE_IDREF);
6020 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6021 SKIP(2);
6022 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006023 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006024 SKIP(6);
6025 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006026 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006027 SKIP(8);
6028 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006029 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006030 SKIP(8);
6031 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00006032 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006033 SKIP(7);
6034 return(XML_ATTRIBUTE_NMTOKEN);
6035 }
6036 return(xmlParseEnumeratedType(ctxt, tree));
6037}
6038
6039/**
6040 * xmlParseAttributeListDecl:
6041 * @ctxt: an XML parser context
6042 *
6043 * : parse the Attribute list def for an element
6044 *
6045 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6046 *
6047 * [53] AttDef ::= S Name S AttType S DefaultDecl
6048 *
6049 */
6050void
6051xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006052 const xmlChar *elemName;
6053 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00006054 xmlEnumerationPtr tree;
6055
Daniel Veillarda07050d2003-10-19 14:46:32 +00006056 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006057 xmlParserInputPtr input = ctxt->input;
6058
6059 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006060 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006061 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006062 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006063 }
6064 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006065 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006066 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006067 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6068 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006069 return;
6070 }
6071 SKIP_BLANKS;
6072 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006073 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006074 const xmlChar *check = CUR_PTR;
6075 int type;
6076 int def;
6077 xmlChar *defaultValue = NULL;
6078
6079 GROW;
6080 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006081 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006082 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006083 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6084 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006085 break;
6086 }
6087 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006088 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006089 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006090 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006091 break;
6092 }
6093 SKIP_BLANKS;
6094
6095 type = xmlParseAttributeType(ctxt, &tree);
6096 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006097 break;
6098 }
6099
6100 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006101 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006102 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6103 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006104 if (tree != NULL)
6105 xmlFreeEnumeration(tree);
6106 break;
6107 }
6108 SKIP_BLANKS;
6109
6110 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6111 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006112 if (defaultValue != NULL)
6113 xmlFree(defaultValue);
6114 if (tree != NULL)
6115 xmlFreeEnumeration(tree);
6116 break;
6117 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006118 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6119 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006120
6121 GROW;
6122 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006123 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006124 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006125 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006126 if (defaultValue != NULL)
6127 xmlFree(defaultValue);
6128 if (tree != NULL)
6129 xmlFreeEnumeration(tree);
6130 break;
6131 }
6132 SKIP_BLANKS;
6133 }
6134 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006135 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6136 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006137 if (defaultValue != NULL)
6138 xmlFree(defaultValue);
6139 if (tree != NULL)
6140 xmlFreeEnumeration(tree);
6141 break;
6142 }
6143 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6144 (ctxt->sax->attributeDecl != NULL))
6145 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6146 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006147 else if (tree != NULL)
6148 xmlFreeEnumeration(tree);
6149
6150 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006151 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006152 (def != XML_ATTRIBUTE_REQUIRED)) {
6153 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6154 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006155 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006156 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6157 }
Owen Taylor3473f882001-02-23 17:55:21 +00006158 if (defaultValue != NULL)
6159 xmlFree(defaultValue);
6160 GROW;
6161 }
6162 if (RAW == '>') {
6163 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006164 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6165 "Attribute list declaration doesn't start and stop in the same entity\n",
6166 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006167 }
6168 NEXT;
6169 }
Owen Taylor3473f882001-02-23 17:55:21 +00006170 }
6171}
6172
6173/**
6174 * xmlParseElementMixedContentDecl:
6175 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006176 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006177 *
6178 * parse the declaration for a Mixed Element content
6179 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006180 *
Owen Taylor3473f882001-02-23 17:55:21 +00006181 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182 * '(' S? '#PCDATA' S? ')'
6183 *
6184 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185 *
6186 * [ VC: No Duplicate Types ]
6187 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006188 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006189 *
6190 * returns: the list of the xmlElementContentPtr describing the element choices
6191 */
6192xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006193xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006194 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006195 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006196
6197 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006198 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006199 SKIP(7);
6200 SKIP_BLANKS;
6201 SHRINK;
6202 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006203 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006204 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006206 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006207 }
Owen Taylor3473f882001-02-23 17:55:21 +00006208 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006209 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006210 if (ret == NULL)
6211 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006212 if (RAW == '*') {
6213 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214 NEXT;
6215 }
6216 return(ret);
6217 }
6218 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006219 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006220 if (ret == NULL) return(NULL);
6221 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006222 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006223 NEXT;
6224 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006225 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006226 if (ret == NULL) return(NULL);
6227 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006228 if (cur != NULL)
6229 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006230 cur = ret;
6231 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006232 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006233 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006234 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006235 if (n->c1 != NULL)
6236 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006237 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006238 if (n != NULL)
6239 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006240 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006241 }
6242 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006243 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006244 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006245 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006246 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006247 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006248 return(NULL);
6249 }
6250 SKIP_BLANKS;
6251 GROW;
6252 }
6253 if ((RAW == ')') && (NXT(1) == '*')) {
6254 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006255 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006256 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006257 if (cur->c2 != NULL)
6258 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006259 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006260 if (ret != NULL)
6261 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006262 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006263 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6264"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006265 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006266 }
Owen Taylor3473f882001-02-23 17:55:21 +00006267 SKIP(2);
6268 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006269 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006270 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006271 return(NULL);
6272 }
6273
6274 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006275 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006276 }
6277 return(ret);
6278}
6279
6280/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006281 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006282 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006283 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006284 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006285 *
6286 * parse the declaration for a Mixed Element content
6287 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006288 *
Owen Taylor3473f882001-02-23 17:55:21 +00006289 *
6290 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6291 *
6292 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6293 *
6294 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6295 *
6296 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6297 *
6298 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6299 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006300 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006301 * opening or closing parentheses in a choice, seq, or Mixed
6302 * construct is contained in the replacement text for a parameter
6303 * entity, both must be contained in the same replacement text. For
6304 * interoperability, if a parameter-entity reference appears in a
6305 * choice, seq, or Mixed construct, its replacement text should not
6306 * be empty, and neither the first nor last non-blank character of
6307 * the replacement text should be a connector (| or ,).
6308 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006309 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006310 * hierarchy.
6311 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006312static xmlElementContentPtr
6313xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6314 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006315 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006316 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006317 xmlChar type = 0;
6318
Daniel Veillard489f9672009-08-10 16:49:30 +02006319 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6320 (depth > 2048)) {
6321 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6322"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6323 depth);
6324 return(NULL);
6325 }
Owen Taylor3473f882001-02-23 17:55:21 +00006326 SKIP_BLANKS;
6327 GROW;
6328 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006329 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006330
Owen Taylor3473f882001-02-23 17:55:21 +00006331 /* Recurse on first child */
6332 NEXT;
6333 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006334 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6335 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006336 SKIP_BLANKS;
6337 GROW;
6338 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006339 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006340 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006341 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006342 return(NULL);
6343 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006344 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006345 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006346 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006347 return(NULL);
6348 }
Owen Taylor3473f882001-02-23 17:55:21 +00006349 GROW;
6350 if (RAW == '?') {
6351 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6352 NEXT;
6353 } else if (RAW == '*') {
6354 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6355 NEXT;
6356 } else if (RAW == '+') {
6357 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6358 NEXT;
6359 } else {
6360 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6361 }
Owen Taylor3473f882001-02-23 17:55:21 +00006362 GROW;
6363 }
6364 SKIP_BLANKS;
6365 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006366 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006367 /*
6368 * Each loop we parse one separator and one element.
6369 */
6370 if (RAW == ',') {
6371 if (type == 0) type = CUR;
6372
6373 /*
6374 * Detect "Name | Name , Name" error
6375 */
6376 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006377 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006378 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006379 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006380 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006381 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006382 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006383 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006384 return(NULL);
6385 }
6386 NEXT;
6387
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006388 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006389 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006390 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006391 xmlFreeDocElementContent(ctxt->myDoc, last);
6392 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006393 return(NULL);
6394 }
6395 if (last == NULL) {
6396 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006397 if (ret != NULL)
6398 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006399 ret = cur = op;
6400 } else {
6401 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006402 if (op != NULL)
6403 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006404 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006405 if (last != NULL)
6406 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006407 cur =op;
6408 last = NULL;
6409 }
6410 } else if (RAW == '|') {
6411 if (type == 0) type = CUR;
6412
6413 /*
6414 * Detect "Name , Name | Name" error
6415 */
6416 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006417 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006418 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006419 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006420 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006421 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006422 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006423 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006424 return(NULL);
6425 }
6426 NEXT;
6427
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006428 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006429 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006430 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006431 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006432 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006433 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006434 return(NULL);
6435 }
6436 if (last == NULL) {
6437 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006438 if (ret != NULL)
6439 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006440 ret = cur = op;
6441 } else {
6442 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006443 if (op != NULL)
6444 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006445 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006446 if (last != NULL)
6447 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006448 cur =op;
6449 last = NULL;
6450 }
6451 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006452 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006453 if ((last != NULL) && (last != ret))
6454 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006455 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006456 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006457 return(NULL);
6458 }
6459 GROW;
6460 SKIP_BLANKS;
6461 GROW;
6462 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006463 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006464 /* Recurse on second child */
6465 NEXT;
6466 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006467 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6468 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006469 SKIP_BLANKS;
6470 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006471 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006472 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006473 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006474 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006475 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006476 return(NULL);
6477 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006478 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006479 if (last == NULL) {
6480 if (ret != NULL)
6481 xmlFreeDocElementContent(ctxt->myDoc, ret);
6482 return(NULL);
6483 }
Owen Taylor3473f882001-02-23 17:55:21 +00006484 if (RAW == '?') {
6485 last->ocur = XML_ELEMENT_CONTENT_OPT;
6486 NEXT;
6487 } else if (RAW == '*') {
6488 last->ocur = XML_ELEMENT_CONTENT_MULT;
6489 NEXT;
6490 } else if (RAW == '+') {
6491 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6492 NEXT;
6493 } else {
6494 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6495 }
6496 }
6497 SKIP_BLANKS;
6498 GROW;
6499 }
6500 if ((cur != NULL) && (last != NULL)) {
6501 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006502 if (last != NULL)
6503 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006504 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006505 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006506 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6507"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006508 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006509 }
Owen Taylor3473f882001-02-23 17:55:21 +00006510 NEXT;
6511 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006512 if (ret != NULL) {
6513 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6514 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6515 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6516 else
6517 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6518 }
Owen Taylor3473f882001-02-23 17:55:21 +00006519 NEXT;
6520 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006521 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006522 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006523 cur = ret;
6524 /*
6525 * Some normalization:
6526 * (a | b* | c?)* == (a | b | c)*
6527 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006528 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006529 if ((cur->c1 != NULL) &&
6530 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6531 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6532 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6533 if ((cur->c2 != NULL) &&
6534 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6535 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6536 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6537 cur = cur->c2;
6538 }
6539 }
Owen Taylor3473f882001-02-23 17:55:21 +00006540 NEXT;
6541 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006542 if (ret != NULL) {
6543 int found = 0;
6544
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006545 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6546 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6547 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006548 else
6549 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006550 /*
6551 * Some normalization:
6552 * (a | b*)+ == (a | b)*
6553 * (a | b?)+ == (a | b)*
6554 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006555 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006556 if ((cur->c1 != NULL) &&
6557 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6558 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6559 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6560 found = 1;
6561 }
6562 if ((cur->c2 != NULL) &&
6563 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6564 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6565 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6566 found = 1;
6567 }
6568 cur = cur->c2;
6569 }
6570 if (found)
6571 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6572 }
Owen Taylor3473f882001-02-23 17:55:21 +00006573 NEXT;
6574 }
6575 return(ret);
6576}
6577
6578/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006579 * xmlParseElementChildrenContentDecl:
6580 * @ctxt: an XML parser context
6581 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006582 *
6583 * parse the declaration for a Mixed Element content
6584 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6585 *
6586 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6587 *
6588 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6589 *
6590 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6591 *
6592 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6593 *
6594 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6595 * TODO Parameter-entity replacement text must be properly nested
6596 * with parenthesized groups. That is to say, if either of the
6597 * opening or closing parentheses in a choice, seq, or Mixed
6598 * construct is contained in the replacement text for a parameter
6599 * entity, both must be contained in the same replacement text. For
6600 * interoperability, if a parameter-entity reference appears in a
6601 * choice, seq, or Mixed construct, its replacement text should not
6602 * be empty, and neither the first nor last non-blank character of
6603 * the replacement text should be a connector (| or ,).
6604 *
6605 * Returns the tree of xmlElementContentPtr describing the element
6606 * hierarchy.
6607 */
6608xmlElementContentPtr
6609xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6610 /* stub left for API/ABI compat */
6611 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6612}
6613
6614/**
Owen Taylor3473f882001-02-23 17:55:21 +00006615 * xmlParseElementContentDecl:
6616 * @ctxt: an XML parser context
6617 * @name: the name of the element being defined.
6618 * @result: the Element Content pointer will be stored here if any
6619 *
6620 * parse the declaration for an Element content either Mixed or Children,
6621 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006622 *
Owen Taylor3473f882001-02-23 17:55:21 +00006623 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6624 *
6625 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6626 */
6627
6628int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006629xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006630 xmlElementContentPtr *result) {
6631
6632 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006633 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006634 int res;
6635
6636 *result = NULL;
6637
6638 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006639 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006640 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006641 return(-1);
6642 }
6643 NEXT;
6644 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006645 if (ctxt->instate == XML_PARSER_EOF)
6646 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006647 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006648 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006649 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006650 res = XML_ELEMENT_TYPE_MIXED;
6651 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006652 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006653 res = XML_ELEMENT_TYPE_ELEMENT;
6654 }
Owen Taylor3473f882001-02-23 17:55:21 +00006655 SKIP_BLANKS;
6656 *result = tree;
6657 return(res);
6658}
6659
6660/**
6661 * xmlParseElementDecl:
6662 * @ctxt: an XML parser context
6663 *
6664 * parse an Element declaration.
6665 *
6666 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6667 *
6668 * [ VC: Unique Element Type Declaration ]
6669 * No element type may be declared more than once
6670 *
6671 * Returns the type of the element, or -1 in case of error
6672 */
6673int
6674xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006675 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006676 int ret = -1;
6677 xmlElementContentPtr content = NULL;
6678
Daniel Veillard4c778d82005-01-23 17:37:44 +00006679 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006680 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006681 xmlParserInputPtr input = ctxt->input;
6682
6683 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006684 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006685 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6686 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006687 }
6688 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006689 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006690 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006691 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6692 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006693 return(-1);
6694 }
6695 while ((RAW == 0) && (ctxt->inputNr > 1))
6696 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006697 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006698 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6699 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006700 }
6701 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006702 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006703 SKIP(5);
6704 /*
6705 * Element must always be empty.
6706 */
6707 ret = XML_ELEMENT_TYPE_EMPTY;
6708 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6709 (NXT(2) == 'Y')) {
6710 SKIP(3);
6711 /*
6712 * Element is a generic container.
6713 */
6714 ret = XML_ELEMENT_TYPE_ANY;
6715 } else if (RAW == '(') {
6716 ret = xmlParseElementContentDecl(ctxt, name, &content);
6717 } else {
6718 /*
6719 * [ WFC: PEs in Internal Subset ] error handling.
6720 */
6721 if ((RAW == '%') && (ctxt->external == 0) &&
6722 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006723 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006724 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006725 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006726 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006727 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6728 }
Owen Taylor3473f882001-02-23 17:55:21 +00006729 return(-1);
6730 }
6731
6732 SKIP_BLANKS;
6733 /*
6734 * Pop-up of finished entities.
6735 */
6736 while ((RAW == 0) && (ctxt->inputNr > 1))
6737 xmlPopInput(ctxt);
6738 SKIP_BLANKS;
6739
6740 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006741 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006742 if (content != NULL) {
6743 xmlFreeDocElementContent(ctxt->myDoc, content);
6744 }
Owen Taylor3473f882001-02-23 17:55:21 +00006745 } else {
6746 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006747 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6748 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006749 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006750
Owen Taylor3473f882001-02-23 17:55:21 +00006751 NEXT;
6752 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006753 (ctxt->sax->elementDecl != NULL)) {
6754 if (content != NULL)
6755 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006756 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6757 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006758 if ((content != NULL) && (content->parent == NULL)) {
6759 /*
6760 * this is a trick: if xmlAddElementDecl is called,
6761 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006762 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006763 * interfaces or change the API/ABI
6764 */
6765 xmlFreeDocElementContent(ctxt->myDoc, content);
6766 }
6767 } else if (content != NULL) {
6768 xmlFreeDocElementContent(ctxt->myDoc, content);
6769 }
Owen Taylor3473f882001-02-23 17:55:21 +00006770 }
Owen Taylor3473f882001-02-23 17:55:21 +00006771 }
6772 return(ret);
6773}
6774
6775/**
Owen Taylor3473f882001-02-23 17:55:21 +00006776 * xmlParseConditionalSections
6777 * @ctxt: an XML parser context
6778 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006779 * [61] conditionalSect ::= includeSect | ignoreSect
6780 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006781 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6782 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6783 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6784 */
6785
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006786static void
Owen Taylor3473f882001-02-23 17:55:21 +00006787xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006788 int id = ctxt->input->id;
6789
Owen Taylor3473f882001-02-23 17:55:21 +00006790 SKIP(3);
6791 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006792 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006793 SKIP(7);
6794 SKIP_BLANKS;
6795 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006796 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006797 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006798 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006799 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006800 if (ctxt->input->id != id) {
6801 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6802 "All markup of the conditional section is not in the same entity\n",
6803 NULL, NULL);
6804 }
Owen Taylor3473f882001-02-23 17:55:21 +00006805 NEXT;
6806 }
6807 if (xmlParserDebugEntities) {
6808 if ((ctxt->input != NULL) && (ctxt->input->filename))
6809 xmlGenericError(xmlGenericErrorContext,
6810 "%s(%d): ", ctxt->input->filename,
6811 ctxt->input->line);
6812 xmlGenericError(xmlGenericErrorContext,
6813 "Entering INCLUDE Conditional Section\n");
6814 }
6815
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006816 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6817 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006818 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006819 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006820
6821 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6822 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006823 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006824 NEXT;
6825 } else if (RAW == '%') {
6826 xmlParsePEReference(ctxt);
6827 } else
6828 xmlParseMarkupDecl(ctxt);
6829
6830 /*
6831 * Pop-up of finished entities.
6832 */
6833 while ((RAW == 0) && (ctxt->inputNr > 1))
6834 xmlPopInput(ctxt);
6835
Daniel Veillardfdc91562002-07-01 21:52:03 +00006836 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006837 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006838 break;
6839 }
6840 }
6841 if (xmlParserDebugEntities) {
6842 if ((ctxt->input != NULL) && (ctxt->input->filename))
6843 xmlGenericError(xmlGenericErrorContext,
6844 "%s(%d): ", ctxt->input->filename,
6845 ctxt->input->line);
6846 xmlGenericError(xmlGenericErrorContext,
6847 "Leaving INCLUDE Conditional Section\n");
6848 }
6849
Daniel Veillarda07050d2003-10-19 14:46:32 +00006850 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006851 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006852 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006853 int depth = 0;
6854
6855 SKIP(6);
6856 SKIP_BLANKS;
6857 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006858 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006859 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006860 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006861 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006862 if (ctxt->input->id != id) {
6863 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6864 "All markup of the conditional section is not in the same entity\n",
6865 NULL, NULL);
6866 }
Owen Taylor3473f882001-02-23 17:55:21 +00006867 NEXT;
6868 }
6869 if (xmlParserDebugEntities) {
6870 if ((ctxt->input != NULL) && (ctxt->input->filename))
6871 xmlGenericError(xmlGenericErrorContext,
6872 "%s(%d): ", ctxt->input->filename,
6873 ctxt->input->line);
6874 xmlGenericError(xmlGenericErrorContext,
6875 "Entering IGNORE Conditional Section\n");
6876 }
6877
6878 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006879 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006880 * But disable SAX event generating DTD building in the meantime
6881 */
6882 state = ctxt->disableSAX;
6883 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006884 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006885 ctxt->instate = XML_PARSER_IGNORE;
6886
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006887 while (((depth >= 0) && (RAW != 0)) &&
6888 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006889 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6890 depth++;
6891 SKIP(3);
6892 continue;
6893 }
6894 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6895 if (--depth >= 0) SKIP(3);
6896 continue;
6897 }
6898 NEXT;
6899 continue;
6900 }
6901
6902 ctxt->disableSAX = state;
6903 ctxt->instate = instate;
6904
6905 if (xmlParserDebugEntities) {
6906 if ((ctxt->input != NULL) && (ctxt->input->filename))
6907 xmlGenericError(xmlGenericErrorContext,
6908 "%s(%d): ", ctxt->input->filename,
6909 ctxt->input->line);
6910 xmlGenericError(xmlGenericErrorContext,
6911 "Leaving IGNORE Conditional Section\n");
6912 }
6913
6914 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006915 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +08006916 xmlHaltParser(ctxt);
Daniel Veillard9b851232015-02-23 11:29:20 +08006917 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006918 }
6919
6920 if (RAW == 0)
6921 SHRINK;
6922
6923 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006924 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006925 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006926 if (ctxt->input->id != id) {
6927 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6928 "All markup of the conditional section is not in the same entity\n",
6929 NULL, NULL);
6930 }
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006931 if ((ctxt-> instate != XML_PARSER_EOF) &&
Daniel Veillard41ac9042015-10-27 10:53:44 +08006932 ((ctxt->input->cur + 3) <= ctxt->input->end))
Daniel Veillardbd0526e2015-10-23 19:02:28 +08006933 SKIP(3);
Owen Taylor3473f882001-02-23 17:55:21 +00006934 }
6935}
6936
6937/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006938 * xmlParseMarkupDecl:
6939 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006940 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006941 * parse Markup declarations
6942 *
6943 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6944 * NotationDecl | PI | Comment
6945 *
6946 * [ VC: Proper Declaration/PE Nesting ]
6947 * Parameter-entity replacement text must be properly nested with
6948 * markup declarations. That is to say, if either the first character
6949 * or the last character of a markup declaration (markupdecl above) is
6950 * contained in the replacement text for a parameter-entity reference,
6951 * both must be contained in the same replacement text.
6952 *
6953 * [ WFC: PEs in Internal Subset ]
6954 * In the internal DTD subset, parameter-entity references can occur
6955 * only where markup declarations can occur, not within markup declarations.
6956 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006957 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006958 */
6959void
6960xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6961 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006962 if (CUR == '<') {
6963 if (NXT(1) == '!') {
6964 switch (NXT(2)) {
6965 case 'E':
6966 if (NXT(3) == 'L')
6967 xmlParseElementDecl(ctxt);
6968 else if (NXT(3) == 'N')
6969 xmlParseEntityDecl(ctxt);
6970 break;
6971 case 'A':
6972 xmlParseAttributeListDecl(ctxt);
6973 break;
6974 case 'N':
6975 xmlParseNotationDecl(ctxt);
6976 break;
6977 case '-':
6978 xmlParseComment(ctxt);
6979 break;
6980 default:
6981 /* there is an error but it will be detected later */
6982 break;
6983 }
6984 } else if (NXT(1) == '?') {
6985 xmlParsePI(ctxt);
6986 }
6987 }
Hugh Davenportab2b9a92015-11-03 20:40:49 +08006988
6989 /*
6990 * detect requirement to exit there and act accordingly
6991 * and avoid having instate overriden later on
6992 */
6993 if (ctxt->instate == XML_PARSER_EOF)
6994 return;
6995
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006996 /*
6997 * This is only for internal subset. On external entities,
6998 * the replacement is done before parsing stage
6999 */
7000 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
7001 xmlParsePEReference(ctxt);
7002
7003 /*
7004 * Conditional sections are allowed from entities included
7005 * by PE References in the internal subset.
7006 */
7007 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
7008 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7009 xmlParseConditionalSections(ctxt);
7010 }
7011 }
7012
7013 ctxt->instate = XML_PARSER_DTD;
7014}
7015
7016/**
7017 * xmlParseTextDecl:
7018 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00007019 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007020 * parse an XML declaration header for external entities
7021 *
7022 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007023 */
7024
7025void
7026xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7027 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007028 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007029
7030 /*
7031 * We know that '<?xml' is here.
7032 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00007033 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007034 SKIP(5);
7035 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007036 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007037 return;
7038 }
7039
William M. Brack76e95df2003-10-18 16:20:14 +00007040 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007041 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7042 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007043 }
7044 SKIP_BLANKS;
7045
7046 /*
7047 * We may have the VersionInfo here.
7048 */
7049 version = xmlParseVersionInfo(ctxt);
7050 if (version == NULL)
7051 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00007052 else {
William M. Brack76e95df2003-10-18 16:20:14 +00007053 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007054 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7055 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00007056 }
7057 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007058 ctxt->input->version = version;
7059
7060 /*
7061 * We must have the encoding declaration
7062 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007063 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007064 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7065 /*
7066 * The XML REC instructs us to stop parsing right here
7067 */
7068 return;
7069 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007070 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7071 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7072 "Missing encoding in text declaration\n");
7073 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007074
7075 SKIP_BLANKS;
7076 if ((RAW == '?') && (NXT(1) == '>')) {
7077 SKIP(2);
7078 } else if (RAW == '>') {
7079 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007080 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007081 NEXT;
7082 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007083 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007084 MOVETO_ENDTAG(CUR_PTR);
7085 NEXT;
7086 }
7087}
7088
7089/**
Owen Taylor3473f882001-02-23 17:55:21 +00007090 * xmlParseExternalSubset:
7091 * @ctxt: an XML parser context
7092 * @ExternalID: the external identifier
7093 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007094 *
Owen Taylor3473f882001-02-23 17:55:21 +00007095 * parse Markup declarations from an external subset
7096 *
7097 * [30] extSubset ::= textDecl? extSubsetDecl
7098 *
7099 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7100 */
7101void
7102xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7103 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007104 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007105 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007106
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007107 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007108 (ctxt->input->end - ctxt->input->cur >= 4)) {
7109 xmlChar start[4];
7110 xmlCharEncoding enc;
7111
7112 start[0] = RAW;
7113 start[1] = NXT(1);
7114 start[2] = NXT(2);
7115 start[3] = NXT(3);
7116 enc = xmlDetectCharEncoding(start, 4);
7117 if (enc != XML_CHAR_ENCODING_NONE)
7118 xmlSwitchEncoding(ctxt, enc);
7119 }
7120
Daniel Veillarda07050d2003-10-19 14:46:32 +00007121 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007122 xmlParseTextDecl(ctxt);
7123 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7124 /*
7125 * The XML REC instructs us to stop parsing right here
7126 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08007127 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007128 return;
7129 }
7130 }
7131 if (ctxt->myDoc == NULL) {
7132 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007133 if (ctxt->myDoc == NULL) {
7134 xmlErrMemory(ctxt, "New Doc failed");
7135 return;
7136 }
7137 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007138 }
7139 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7140 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7141
7142 ctxt->instate = XML_PARSER_DTD;
7143 ctxt->external = 1;
7144 while (((RAW == '<') && (NXT(1) == '?')) ||
7145 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007146 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007147 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007148 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007149
7150 GROW;
7151 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7152 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007153 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007154 NEXT;
7155 } else if (RAW == '%') {
7156 xmlParsePEReference(ctxt);
7157 } else
7158 xmlParseMarkupDecl(ctxt);
7159
7160 /*
7161 * Pop-up of finished entities.
7162 */
7163 while ((RAW == 0) && (ctxt->inputNr > 1))
7164 xmlPopInput(ctxt);
7165
Daniel Veillardfdc91562002-07-01 21:52:03 +00007166 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007167 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007168 break;
7169 }
7170 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007171
Owen Taylor3473f882001-02-23 17:55:21 +00007172 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007173 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007174 }
7175
7176}
7177
7178/**
7179 * xmlParseReference:
7180 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007181 *
Owen Taylor3473f882001-02-23 17:55:21 +00007182 * parse and handle entity references in content, depending on the SAX
7183 * interface, this may end-up in a call to character() if this is a
7184 * CharRef, a predefined entity, if there is no reference() callback.
7185 * or if the parser was asked to switch to that mode.
7186 *
7187 * [67] Reference ::= EntityRef | CharRef
7188 */
7189void
7190xmlParseReference(xmlParserCtxtPtr ctxt) {
7191 xmlEntityPtr ent;
7192 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007193 int was_checked;
7194 xmlNodePtr list = NULL;
7195 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007196
Daniel Veillard0161e632008-08-28 15:36:32 +00007197
7198 if (RAW != '&')
7199 return;
7200
7201 /*
7202 * Simple case of a CharRef
7203 */
Owen Taylor3473f882001-02-23 17:55:21 +00007204 if (NXT(1) == '#') {
7205 int i = 0;
7206 xmlChar out[10];
7207 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007208 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007209
Daniel Veillarddc171602008-03-26 17:41:38 +00007210 if (value == 0)
7211 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007212 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7213 /*
7214 * So we are using non-UTF-8 buffers
7215 * Check that the char fit on 8bits, if not
7216 * generate a CharRef.
7217 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007218 if (value <= 0xFF) {
7219 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007220 out[1] = 0;
7221 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7222 (!ctxt->disableSAX))
7223 ctxt->sax->characters(ctxt->userData, out, 1);
7224 } else {
7225 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007226 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007227 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007228 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007229 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7230 (!ctxt->disableSAX))
7231 ctxt->sax->reference(ctxt->userData, out);
7232 }
7233 } else {
7234 /*
7235 * Just encode the value in UTF-8
7236 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007237 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007238 out[i] = 0;
7239 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7240 (!ctxt->disableSAX))
7241 ctxt->sax->characters(ctxt->userData, out, i);
7242 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007243 return;
7244 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007245
Daniel Veillard0161e632008-08-28 15:36:32 +00007246 /*
7247 * We are seeing an entity reference
7248 */
7249 ent = xmlParseEntityRef(ctxt);
7250 if (ent == NULL) return;
7251 if (!ctxt->wellFormed)
7252 return;
7253 was_checked = ent->checked;
7254
7255 /* special case of predefined entities */
7256 if ((ent->name == NULL) ||
7257 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7258 val = ent->content;
7259 if (val == NULL) return;
7260 /*
7261 * inline the entity.
7262 */
7263 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7264 (!ctxt->disableSAX))
7265 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7266 return;
7267 }
7268
7269 /*
7270 * The first reference to the entity trigger a parsing phase
7271 * where the ent->children is filled with the result from
7272 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007273 * Note: external parsed entities will not be loaded, it is not
7274 * required for a non-validating parser, unless the parsing option
7275 * of validating, or substituting entities were given. Doing so is
7276 * far more secure as the parser will only process data coming from
7277 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007278 */
Daniel Veillard72a46a52014-10-23 11:35:36 +08007279 if (((ent->checked == 0) ||
7280 ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
Daniel Veillard4629ee02012-07-23 14:15:40 +08007281 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7282 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007283 unsigned long oldnbent = ctxt->nbentities;
7284
7285 /*
7286 * This is a bit hackish but this seems the best
7287 * way to make sure both SAX and DOM entity support
7288 * behaves okay.
7289 */
7290 void *user_data;
7291 if (ctxt->userData == ctxt)
7292 user_data = NULL;
7293 else
7294 user_data = ctxt->userData;
7295
7296 /*
7297 * Check that this entity is well formed
7298 * 4.3.2: An internal general parsed entity is well-formed
7299 * if its replacement text matches the production labeled
7300 * content.
7301 */
7302 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7303 ctxt->depth++;
7304 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7305 user_data, &list);
7306 ctxt->depth--;
7307
7308 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7309 ctxt->depth++;
7310 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7311 user_data, ctxt->depth, ent->URI,
7312 ent->ExternalID, &list);
7313 ctxt->depth--;
7314 } else {
7315 ret = XML_ERR_ENTITY_PE_INTERNAL;
7316 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7317 "invalid entity type found\n", NULL);
7318 }
7319
7320 /*
7321 * Store the number of entities needing parsing for this entity
7322 * content and do checkings
7323 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007324 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7325 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7326 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007327 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007328 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007329 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007330 return;
7331 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007332 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007333 xmlFreeNodeList(list);
7334 return;
7335 }
Owen Taylor3473f882001-02-23 17:55:21 +00007336
Daniel Veillard0161e632008-08-28 15:36:32 +00007337 if ((ret == XML_ERR_OK) && (list != NULL)) {
7338 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7339 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7340 (ent->children == NULL)) {
7341 ent->children = list;
7342 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007343 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007344 * Prune it directly in the generated document
7345 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007346 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007347 if (((list->type == XML_TEXT_NODE) &&
7348 (list->next == NULL)) ||
7349 (ctxt->parseMode == XML_PARSE_READER)) {
7350 list->parent = (xmlNodePtr) ent;
7351 list = NULL;
7352 ent->owner = 1;
7353 } else {
7354 ent->owner = 0;
7355 while (list != NULL) {
7356 list->parent = (xmlNodePtr) ctxt->node;
7357 list->doc = ctxt->myDoc;
7358 if (list->next == NULL)
7359 ent->last = list;
7360 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007361 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007362 list = ent->children;
7363#ifdef LIBXML_LEGACY_ENABLED
7364 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7365 xmlAddEntityReference(ent, list, NULL);
7366#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007367 }
7368 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007369 ent->owner = 1;
7370 while (list != NULL) {
7371 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007372 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007373 if (list->next == NULL)
7374 ent->last = list;
7375 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007376 }
7377 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007378 } else {
7379 xmlFreeNodeList(list);
7380 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007381 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007382 } else if ((ret != XML_ERR_OK) &&
7383 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7384 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7385 "Entity '%s' failed to parse\n", ent->name);
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007386 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007387 } else if (list != NULL) {
7388 xmlFreeNodeList(list);
7389 list = NULL;
7390 }
7391 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007392 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007393 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007394 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007395 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007396
Daniel Veillard0161e632008-08-28 15:36:32 +00007397 /*
7398 * Now that the entity content has been gathered
7399 * provide it to the application, this can take different forms based
7400 * on the parsing modes.
7401 */
7402 if (ent->children == NULL) {
7403 /*
7404 * Probably running in SAX mode and the callbacks don't
7405 * build the entity content. So unless we already went
7406 * though parsing for first checking go though the entity
7407 * content to generate callbacks associated to the entity
7408 */
7409 if (was_checked != 0) {
7410 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007411 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007412 * This is a bit hackish but this seems the best
7413 * way to make sure both SAX and DOM entity support
7414 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007415 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007416 if (ctxt->userData == ctxt)
7417 user_data = NULL;
7418 else
7419 user_data = ctxt->userData;
7420
7421 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7422 ctxt->depth++;
7423 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7424 ent->content, user_data, NULL);
7425 ctxt->depth--;
7426 } else if (ent->etype ==
7427 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7428 ctxt->depth++;
7429 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7430 ctxt->sax, user_data, ctxt->depth,
7431 ent->URI, ent->ExternalID, NULL);
7432 ctxt->depth--;
7433 } else {
7434 ret = XML_ERR_ENTITY_PE_INTERNAL;
7435 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7436 "invalid entity type found\n", NULL);
7437 }
7438 if (ret == XML_ERR_ENTITY_LOOP) {
7439 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7440 return;
7441 }
7442 }
7443 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7444 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7445 /*
7446 * Entity reference callback comes second, it's somewhat
7447 * superfluous but a compatibility to historical behaviour
7448 */
7449 ctxt->sax->reference(ctxt->userData, ent->name);
7450 }
7451 return;
7452 }
7453
7454 /*
7455 * If we didn't get any children for the entity being built
7456 */
7457 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7458 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7459 /*
7460 * Create a node.
7461 */
7462 ctxt->sax->reference(ctxt->userData, ent->name);
7463 return;
7464 }
7465
7466 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7467 /*
7468 * There is a problem on the handling of _private for entities
7469 * (bug 155816): Should we copy the content of the field from
7470 * the entity (possibly overwriting some value set by the user
7471 * when a copy is created), should we leave it alone, or should
7472 * we try to take care of different situations? The problem
7473 * is exacerbated by the usage of this field by the xmlReader.
7474 * To fix this bug, we look at _private on the created node
7475 * and, if it's NULL, we copy in whatever was in the entity.
7476 * If it's not NULL we leave it alone. This is somewhat of a
7477 * hack - maybe we should have further tests to determine
7478 * what to do.
7479 */
7480 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7481 /*
7482 * Seems we are generating the DOM content, do
7483 * a simple tree copy for all references except the first
7484 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007485 */
7486 if (((list == NULL) && (ent->owner == 0)) ||
7487 (ctxt->parseMode == XML_PARSE_READER)) {
7488 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7489
7490 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007491 * We are copying here, make sure there is no abuse
7492 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007493 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007494 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7495 return;
7496
7497 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007498 * when operating on a reader, the entities definitions
7499 * are always owning the entities subtree.
7500 if (ctxt->parseMode == XML_PARSE_READER)
7501 ent->owner = 1;
7502 */
7503
7504 cur = ent->children;
7505 while (cur != NULL) {
7506 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7507 if (nw != NULL) {
7508 if (nw->_private == NULL)
7509 nw->_private = cur->_private;
7510 if (firstChild == NULL){
7511 firstChild = nw;
7512 }
7513 nw = xmlAddChild(ctxt->node, nw);
7514 }
7515 if (cur == ent->last) {
7516 /*
7517 * needed to detect some strange empty
7518 * node cases in the reader tests
7519 */
7520 if ((ctxt->parseMode == XML_PARSE_READER) &&
7521 (nw != NULL) &&
7522 (nw->type == XML_ELEMENT_NODE) &&
7523 (nw->children == NULL))
7524 nw->extra = 1;
7525
7526 break;
7527 }
7528 cur = cur->next;
7529 }
7530#ifdef LIBXML_LEGACY_ENABLED
7531 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7532 xmlAddEntityReference(ent, firstChild, nw);
7533#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007534 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007535 xmlNodePtr nw = NULL, cur, next, last,
7536 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007537
7538 /*
7539 * We are copying here, make sure there is no abuse
7540 */
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007541 ctxt->sizeentcopy += ent->length + 5;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007542 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7543 return;
7544
Daniel Veillard0161e632008-08-28 15:36:32 +00007545 /*
7546 * Copy the entity child list and make it the new
7547 * entity child list. The goal is to make sure any
7548 * ID or REF referenced will be the one from the
7549 * document content and not the entity copy.
7550 */
7551 cur = ent->children;
7552 ent->children = NULL;
7553 last = ent->last;
7554 ent->last = NULL;
7555 while (cur != NULL) {
7556 next = cur->next;
7557 cur->next = NULL;
7558 cur->parent = NULL;
7559 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7560 if (nw != NULL) {
7561 if (nw->_private == NULL)
7562 nw->_private = cur->_private;
7563 if (firstChild == NULL){
7564 firstChild = cur;
7565 }
7566 xmlAddChild((xmlNodePtr) ent, nw);
7567 xmlAddChild(ctxt->node, cur);
7568 }
7569 if (cur == last)
7570 break;
7571 cur = next;
7572 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007573 if (ent->owner == 0)
7574 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007575#ifdef LIBXML_LEGACY_ENABLED
7576 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7577 xmlAddEntityReference(ent, firstChild, nw);
7578#endif /* LIBXML_LEGACY_ENABLED */
7579 } else {
7580 const xmlChar *nbktext;
7581
7582 /*
7583 * the name change is to avoid coalescing of the
7584 * node with a possible previous text one which
7585 * would make ent->children a dangling pointer
7586 */
7587 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7588 -1);
7589 if (ent->children->type == XML_TEXT_NODE)
7590 ent->children->name = nbktext;
7591 if ((ent->last != ent->children) &&
7592 (ent->last->type == XML_TEXT_NODE))
7593 ent->last->name = nbktext;
7594 xmlAddChildList(ctxt->node, ent->children);
7595 }
7596
7597 /*
7598 * This is to avoid a nasty side effect, see
7599 * characters() in SAX.c
7600 */
7601 ctxt->nodemem = 0;
7602 ctxt->nodelen = 0;
7603 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007604 }
7605 }
7606}
7607
7608/**
7609 * xmlParseEntityRef:
7610 * @ctxt: an XML parser context
7611 *
7612 * parse ENTITY references declarations
7613 *
7614 * [68] EntityRef ::= '&' Name ';'
7615 *
7616 * [ WFC: Entity Declared ]
7617 * In a document without any DTD, a document with only an internal DTD
7618 * subset which contains no parameter entity references, or a document
7619 * with "standalone='yes'", the Name given in the entity reference
7620 * must match that in an entity declaration, except that well-formed
7621 * documents need not declare any of the following entities: amp, lt,
7622 * gt, apos, quot. The declaration of a parameter entity must precede
7623 * any reference to it. Similarly, the declaration of a general entity
7624 * must precede any reference to it which appears in a default value in an
7625 * attribute-list declaration. Note that if entities are declared in the
7626 * external subset or in external parameter entities, a non-validating
7627 * processor is not obligated to read and process their declarations;
7628 * for such documents, the rule that an entity must be declared is a
7629 * well-formedness constraint only if standalone='yes'.
7630 *
7631 * [ WFC: Parsed Entity ]
7632 * An entity reference must not contain the name of an unparsed entity
7633 *
7634 * Returns the xmlEntityPtr if found, or NULL otherwise.
7635 */
7636xmlEntityPtr
7637xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007638 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007639 xmlEntityPtr ent = NULL;
7640
7641 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007642 if (ctxt->instate == XML_PARSER_EOF)
7643 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007644
Daniel Veillard0161e632008-08-28 15:36:32 +00007645 if (RAW != '&')
7646 return(NULL);
7647 NEXT;
7648 name = xmlParseName(ctxt);
7649 if (name == NULL) {
7650 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7651 "xmlParseEntityRef: no name\n");
7652 return(NULL);
7653 }
7654 if (RAW != ';') {
7655 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7656 return(NULL);
7657 }
7658 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007659
Daniel Veillard0161e632008-08-28 15:36:32 +00007660 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007661 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007662 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007663 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7664 ent = xmlGetPredefinedEntity(name);
7665 if (ent != NULL)
7666 return(ent);
7667 }
Owen Taylor3473f882001-02-23 17:55:21 +00007668
Daniel Veillard0161e632008-08-28 15:36:32 +00007669 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007670 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007671 */
7672 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007673
Daniel Veillard0161e632008-08-28 15:36:32 +00007674 /*
7675 * Ask first SAX for entity resolution, otherwise try the
7676 * entities which may have stored in the parser context.
7677 */
7678 if (ctxt->sax != NULL) {
7679 if (ctxt->sax->getEntity != NULL)
7680 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007681 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007682 (ctxt->options & XML_PARSE_OLDSAX))
7683 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007684 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7685 (ctxt->userData==ctxt)) {
7686 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007687 }
7688 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007689 if (ctxt->instate == XML_PARSER_EOF)
7690 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007691 /*
7692 * [ WFC: Entity Declared ]
7693 * In a document without any DTD, a document with only an
7694 * internal DTD subset which contains no parameter entity
7695 * references, or a document with "standalone='yes'", the
7696 * Name given in the entity reference must match that in an
7697 * entity declaration, except that well-formed documents
7698 * need not declare any of the following entities: amp, lt,
7699 * gt, apos, quot.
7700 * The declaration of a parameter entity must precede any
7701 * reference to it.
7702 * Similarly, the declaration of a general entity must
7703 * precede any reference to it which appears in a default
7704 * value in an attribute-list declaration. Note that if
7705 * entities are declared in the external subset or in
7706 * external parameter entities, a non-validating processor
7707 * is not obligated to read and process their declarations;
7708 * for such documents, the rule that an entity must be
7709 * declared is a well-formedness constraint only if
7710 * standalone='yes'.
7711 */
7712 if (ent == NULL) {
7713 if ((ctxt->standalone == 1) ||
7714 ((ctxt->hasExternalSubset == 0) &&
7715 (ctxt->hasPErefs == 0))) {
7716 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7717 "Entity '%s' not defined\n", name);
7718 } else {
7719 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7720 "Entity '%s' not defined\n", name);
7721 if ((ctxt->inSubset == 0) &&
7722 (ctxt->sax != NULL) &&
7723 (ctxt->sax->reference != NULL)) {
7724 ctxt->sax->reference(ctxt->userData, name);
7725 }
7726 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007727 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007728 ctxt->valid = 0;
7729 }
7730
7731 /*
7732 * [ WFC: Parsed Entity ]
7733 * An entity reference must not contain the name of an
7734 * unparsed entity
7735 */
7736 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7737 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7738 "Entity reference to unparsed entity %s\n", name);
7739 }
7740
7741 /*
7742 * [ WFC: No External Entity References ]
7743 * Attribute values cannot contain direct or indirect
7744 * entity references to external entities.
7745 */
7746 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7747 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7748 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7749 "Attribute references external entity '%s'\n", name);
7750 }
7751 /*
7752 * [ WFC: No < in Attribute Values ]
7753 * The replacement text of any entity referred to directly or
7754 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007755 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007756 */
7757 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007758 (ent != NULL) &&
7759 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007760 if (((ent->checked & 1) || (ent->checked == 0)) &&
7761 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007762 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7763 "'<' in entity '%s' is not allowed in attributes values\n", name);
7764 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007765 }
7766
7767 /*
7768 * Internal check, no parameter entities here ...
7769 */
7770 else {
7771 switch (ent->etype) {
7772 case XML_INTERNAL_PARAMETER_ENTITY:
7773 case XML_EXTERNAL_PARAMETER_ENTITY:
7774 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7775 "Attempt to reference the parameter entity '%s'\n",
7776 name);
7777 break;
7778 default:
7779 break;
7780 }
7781 }
7782
7783 /*
7784 * [ WFC: No Recursion ]
7785 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007786 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007787 * Done somewhere else
7788 */
Owen Taylor3473f882001-02-23 17:55:21 +00007789 return(ent);
7790}
7791
7792/**
7793 * xmlParseStringEntityRef:
7794 * @ctxt: an XML parser context
7795 * @str: a pointer to an index in the string
7796 *
7797 * parse ENTITY references declarations, but this version parses it from
7798 * a string value.
7799 *
7800 * [68] EntityRef ::= '&' Name ';'
7801 *
7802 * [ WFC: Entity Declared ]
7803 * In a document without any DTD, a document with only an internal DTD
7804 * subset which contains no parameter entity references, or a document
7805 * with "standalone='yes'", the Name given in the entity reference
7806 * must match that in an entity declaration, except that well-formed
7807 * documents need not declare any of the following entities: amp, lt,
7808 * gt, apos, quot. The declaration of a parameter entity must precede
7809 * any reference to it. Similarly, the declaration of a general entity
7810 * must precede any reference to it which appears in a default value in an
7811 * attribute-list declaration. Note that if entities are declared in the
7812 * external subset or in external parameter entities, a non-validating
7813 * processor is not obligated to read and process their declarations;
7814 * for such documents, the rule that an entity must be declared is a
7815 * well-formedness constraint only if standalone='yes'.
7816 *
7817 * [ WFC: Parsed Entity ]
7818 * An entity reference must not contain the name of an unparsed entity
7819 *
7820 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7821 * is updated to the current location in the string.
7822 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007823static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007824xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7825 xmlChar *name;
7826 const xmlChar *ptr;
7827 xmlChar cur;
7828 xmlEntityPtr ent = NULL;
7829
7830 if ((str == NULL) || (*str == NULL))
7831 return(NULL);
7832 ptr = *str;
7833 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007834 if (cur != '&')
7835 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007836
Daniel Veillard0161e632008-08-28 15:36:32 +00007837 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007838 name = xmlParseStringName(ctxt, &ptr);
7839 if (name == NULL) {
7840 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7841 "xmlParseStringEntityRef: no name\n");
7842 *str = ptr;
7843 return(NULL);
7844 }
7845 if (*ptr != ';') {
7846 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007847 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007848 *str = ptr;
7849 return(NULL);
7850 }
7851 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007852
Owen Taylor3473f882001-02-23 17:55:21 +00007853
Daniel Veillard0161e632008-08-28 15:36:32 +00007854 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007855 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007856 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007857 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7858 ent = xmlGetPredefinedEntity(name);
7859 if (ent != NULL) {
7860 xmlFree(name);
7861 *str = ptr;
7862 return(ent);
7863 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007864 }
Owen Taylor3473f882001-02-23 17:55:21 +00007865
Daniel Veillard0161e632008-08-28 15:36:32 +00007866 /*
7867 * Increate the number of entity references parsed
7868 */
7869 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007870
Daniel Veillard0161e632008-08-28 15:36:32 +00007871 /*
7872 * Ask first SAX for entity resolution, otherwise try the
7873 * entities which may have stored in the parser context.
7874 */
7875 if (ctxt->sax != NULL) {
7876 if (ctxt->sax->getEntity != NULL)
7877 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007878 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7879 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007880 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7881 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007882 }
7883 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007884 if (ctxt->instate == XML_PARSER_EOF) {
7885 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007886 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007887 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007888
7889 /*
7890 * [ WFC: Entity Declared ]
7891 * In a document without any DTD, a document with only an
7892 * internal DTD subset which contains no parameter entity
7893 * references, or a document with "standalone='yes'", the
7894 * Name given in the entity reference must match that in an
7895 * entity declaration, except that well-formed documents
7896 * need not declare any of the following entities: amp, lt,
7897 * gt, apos, quot.
7898 * The declaration of a parameter entity must precede any
7899 * reference to it.
7900 * Similarly, the declaration of a general entity must
7901 * precede any reference to it which appears in a default
7902 * value in an attribute-list declaration. Note that if
7903 * entities are declared in the external subset or in
7904 * external parameter entities, a non-validating processor
7905 * is not obligated to read and process their declarations;
7906 * for such documents, the rule that an entity must be
7907 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007908 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007909 */
7910 if (ent == NULL) {
7911 if ((ctxt->standalone == 1) ||
7912 ((ctxt->hasExternalSubset == 0) &&
7913 (ctxt->hasPErefs == 0))) {
7914 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7915 "Entity '%s' not defined\n", name);
7916 } else {
7917 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7918 "Entity '%s' not defined\n",
7919 name);
7920 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08007921 xmlParserEntityCheck(ctxt, 0, ent, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00007922 /* TODO ? check regressions ctxt->valid = 0; */
7923 }
7924
7925 /*
7926 * [ WFC: Parsed Entity ]
7927 * An entity reference must not contain the name of an
7928 * unparsed entity
7929 */
7930 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7931 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7932 "Entity reference to unparsed entity %s\n", name);
7933 }
7934
7935 /*
7936 * [ WFC: No External Entity References ]
7937 * Attribute values cannot contain direct or indirect
7938 * entity references to external entities.
7939 */
7940 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7941 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7942 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7943 "Attribute references external entity '%s'\n", name);
7944 }
7945 /*
7946 * [ WFC: No < in Attribute Values ]
7947 * The replacement text of any entity referred to directly or
7948 * indirectly in an attribute value (other than "&lt;") must
7949 * not contain a <.
7950 */
7951 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7952 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007953 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007954 (xmlStrchr(ent->content, '<'))) {
7955 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7956 "'<' in entity '%s' is not allowed in attributes values\n",
7957 name);
7958 }
7959
7960 /*
7961 * Internal check, no parameter entities here ...
7962 */
7963 else {
7964 switch (ent->etype) {
7965 case XML_INTERNAL_PARAMETER_ENTITY:
7966 case XML_EXTERNAL_PARAMETER_ENTITY:
7967 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7968 "Attempt to reference the parameter entity '%s'\n",
7969 name);
7970 break;
7971 default:
7972 break;
7973 }
7974 }
7975
7976 /*
7977 * [ WFC: No Recursion ]
7978 * A parsed entity must not contain a recursive reference
7979 * to itself, either directly or indirectly.
7980 * Done somewhere else
7981 */
7982
7983 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007984 *str = ptr;
7985 return(ent);
7986}
7987
7988/**
7989 * xmlParsePEReference:
7990 * @ctxt: an XML parser context
7991 *
7992 * parse PEReference declarations
7993 * The entity content is handled directly by pushing it's content as
7994 * a new input stream.
7995 *
7996 * [69] PEReference ::= '%' Name ';'
7997 *
7998 * [ WFC: No Recursion ]
7999 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008000 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008001 *
8002 * [ WFC: Entity Declared ]
8003 * In a document without any DTD, a document with only an internal DTD
8004 * subset which contains no parameter entity references, or a document
8005 * with "standalone='yes'", ... ... The declaration of a parameter
8006 * entity must precede any reference to it...
8007 *
8008 * [ VC: Entity Declared ]
8009 * In a document with an external subset or external parameter entities
8010 * with "standalone='no'", ... ... The declaration of a parameter entity
8011 * must precede any reference to it...
8012 *
8013 * [ WFC: In DTD ]
8014 * Parameter-entity references may only appear in the DTD.
8015 * NOTE: misleading but this is handled.
8016 */
8017void
Daniel Veillard8f597c32003-10-06 08:19:27 +00008018xmlParsePEReference(xmlParserCtxtPtr ctxt)
8019{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008020 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008021 xmlEntityPtr entity = NULL;
8022 xmlParserInputPtr input;
8023
Daniel Veillard0161e632008-08-28 15:36:32 +00008024 if (RAW != '%')
8025 return;
8026 NEXT;
8027 name = xmlParseName(ctxt);
8028 if (name == NULL) {
8029 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8030 "xmlParsePEReference: no name\n");
8031 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008032 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008033 if (RAW != ';') {
8034 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8035 return;
8036 }
8037
8038 NEXT;
8039
8040 /*
8041 * Increate the number of entity references parsed
8042 */
8043 ctxt->nbentities++;
8044
8045 /*
8046 * Request the entity from SAX
8047 */
8048 if ((ctxt->sax != NULL) &&
8049 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008050 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8051 if (ctxt->instate == XML_PARSER_EOF)
8052 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00008053 if (entity == NULL) {
8054 /*
8055 * [ WFC: Entity Declared ]
8056 * In a document without any DTD, a document with only an
8057 * internal DTD subset which contains no parameter entity
8058 * references, or a document with "standalone='yes'", ...
8059 * ... The declaration of a parameter entity must precede
8060 * any reference to it...
8061 */
8062 if ((ctxt->standalone == 1) ||
8063 ((ctxt->hasExternalSubset == 0) &&
8064 (ctxt->hasPErefs == 0))) {
8065 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8066 "PEReference: %%%s; not found\n",
8067 name);
8068 } else {
8069 /*
8070 * [ VC: Entity Declared ]
8071 * In a document with an external subset or external
8072 * parameter entities with "standalone='no'", ...
8073 * ... The declaration of a parameter entity must
8074 * precede any reference to it...
8075 */
8076 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8077 "PEReference: %%%s; not found\n",
8078 name, NULL);
8079 ctxt->valid = 0;
8080 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008081 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008082 } else {
8083 /*
8084 * Internal checking in case the entity quest barfed
8085 */
8086 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8087 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8088 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8089 "Internal: %%%s; is not a parameter entity\n",
8090 name, NULL);
8091 } else if (ctxt->input->free != deallocblankswrapper) {
8092 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8093 if (xmlPushInput(ctxt, input) < 0)
8094 return;
8095 } else {
8096 /*
8097 * TODO !!!
8098 * handle the extra spaces added before and after
8099 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8100 */
8101 input = xmlNewEntityInputStream(ctxt, entity);
8102 if (xmlPushInput(ctxt, input) < 0)
8103 return;
8104 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8105 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8106 (IS_BLANK_CH(NXT(5)))) {
8107 xmlParseTextDecl(ctxt);
8108 if (ctxt->errNo ==
8109 XML_ERR_UNSUPPORTED_ENCODING) {
8110 /*
8111 * The XML REC instructs us to stop parsing
8112 * right here
8113 */
Daniel Veillarde3b15972015-11-20 14:59:30 +08008114 xmlHaltParser(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00008115 return;
8116 }
8117 }
8118 }
8119 }
8120 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008121}
8122
8123/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008124 * xmlLoadEntityContent:
8125 * @ctxt: an XML parser context
8126 * @entity: an unloaded system entity
8127 *
8128 * Load the original content of the given system entity from the
8129 * ExternalID/SystemID given. This is to be used for Included in Literal
8130 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8131 *
8132 * Returns 0 in case of success and -1 in case of failure
8133 */
8134static int
8135xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8136 xmlParserInputPtr input;
8137 xmlBufferPtr buf;
8138 int l, c;
8139 int count = 0;
8140
8141 if ((ctxt == NULL) || (entity == NULL) ||
8142 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8143 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8144 (entity->content != NULL)) {
8145 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146 "xmlLoadEntityContent parameter error");
8147 return(-1);
8148 }
8149
8150 if (xmlParserDebugEntities)
8151 xmlGenericError(xmlGenericErrorContext,
8152 "Reading %s entity content input\n", entity->name);
8153
8154 buf = xmlBufferCreate();
8155 if (buf == NULL) {
8156 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8157 "xmlLoadEntityContent parameter error");
8158 return(-1);
8159 }
8160
8161 input = xmlNewEntityInputStream(ctxt, entity);
8162 if (input == NULL) {
8163 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8164 "xmlLoadEntityContent input error");
8165 xmlBufferFree(buf);
8166 return(-1);
8167 }
8168
8169 /*
8170 * Push the entity as the current input, read char by char
8171 * saving to the buffer until the end of the entity or an error
8172 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008173 if (xmlPushInput(ctxt, input) < 0) {
8174 xmlBufferFree(buf);
8175 return(-1);
8176 }
8177
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008178 GROW;
8179 c = CUR_CHAR(l);
8180 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8181 (IS_CHAR(c))) {
8182 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008183 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008184 count = 0;
8185 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008186 if (ctxt->instate == XML_PARSER_EOF) {
8187 xmlBufferFree(buf);
8188 return(-1);
8189 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008190 }
8191 NEXTL(l);
8192 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008193 if (c == 0) {
8194 count = 0;
8195 GROW;
8196 if (ctxt->instate == XML_PARSER_EOF) {
8197 xmlBufferFree(buf);
8198 return(-1);
8199 }
8200 c = CUR_CHAR(l);
8201 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008202 }
8203
8204 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8205 xmlPopInput(ctxt);
8206 } else if (!IS_CHAR(c)) {
8207 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8208 "xmlLoadEntityContent: invalid char value %d\n",
8209 c);
8210 xmlBufferFree(buf);
8211 return(-1);
8212 }
8213 entity->content = buf->content;
8214 buf->content = NULL;
8215 xmlBufferFree(buf);
8216
8217 return(0);
8218}
8219
8220/**
Owen Taylor3473f882001-02-23 17:55:21 +00008221 * xmlParseStringPEReference:
8222 * @ctxt: an XML parser context
8223 * @str: a pointer to an index in the string
8224 *
8225 * parse PEReference declarations
8226 *
8227 * [69] PEReference ::= '%' Name ';'
8228 *
8229 * [ WFC: No Recursion ]
8230 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008231 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008232 *
8233 * [ WFC: Entity Declared ]
8234 * In a document without any DTD, a document with only an internal DTD
8235 * subset which contains no parameter entity references, or a document
8236 * with "standalone='yes'", ... ... The declaration of a parameter
8237 * entity must precede any reference to it...
8238 *
8239 * [ VC: Entity Declared ]
8240 * In a document with an external subset or external parameter entities
8241 * with "standalone='no'", ... ... The declaration of a parameter entity
8242 * must precede any reference to it...
8243 *
8244 * [ WFC: In DTD ]
8245 * Parameter-entity references may only appear in the DTD.
8246 * NOTE: misleading but this is handled.
8247 *
8248 * Returns the string of the entity content.
8249 * str is updated to the current value of the index
8250 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008251static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008252xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8253 const xmlChar *ptr;
8254 xmlChar cur;
8255 xmlChar *name;
8256 xmlEntityPtr entity = NULL;
8257
8258 if ((str == NULL) || (*str == NULL)) return(NULL);
8259 ptr = *str;
8260 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008261 if (cur != '%')
8262 return(NULL);
8263 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008264 name = xmlParseStringName(ctxt, &ptr);
8265 if (name == NULL) {
8266 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8267 "xmlParseStringPEReference: no name\n");
8268 *str = ptr;
8269 return(NULL);
8270 }
8271 cur = *ptr;
8272 if (cur != ';') {
8273 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8274 xmlFree(name);
8275 *str = ptr;
8276 return(NULL);
8277 }
8278 ptr++;
8279
8280 /*
8281 * Increate the number of entity references parsed
8282 */
8283 ctxt->nbentities++;
8284
8285 /*
8286 * Request the entity from SAX
8287 */
8288 if ((ctxt->sax != NULL) &&
8289 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008290 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8291 if (ctxt->instate == XML_PARSER_EOF) {
8292 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008293 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008294 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008295 if (entity == NULL) {
8296 /*
8297 * [ WFC: Entity Declared ]
8298 * In a document without any DTD, a document with only an
8299 * internal DTD subset which contains no parameter entity
8300 * references, or a document with "standalone='yes'", ...
8301 * ... The declaration of a parameter entity must precede
8302 * any reference to it...
8303 */
8304 if ((ctxt->standalone == 1) ||
8305 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8306 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8307 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008308 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008309 /*
8310 * [ VC: Entity Declared ]
8311 * In a document with an external subset or external
8312 * parameter entities with "standalone='no'", ...
8313 * ... The declaration of a parameter entity must
8314 * precede any reference to it...
8315 */
8316 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8317 "PEReference: %%%s; not found\n",
8318 name, NULL);
8319 ctxt->valid = 0;
8320 }
Daniel Veillardbe2a7ed2014-10-16 13:59:47 +08008321 xmlParserEntityCheck(ctxt, 0, NULL, 0);
Daniel Veillard0161e632008-08-28 15:36:32 +00008322 } else {
8323 /*
8324 * Internal checking in case the entity quest barfed
8325 */
8326 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8327 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8328 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8329 "%%%s; is not a parameter entity\n",
8330 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008331 }
8332 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008333 ctxt->hasPErefs = 1;
8334 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008335 *str = ptr;
8336 return(entity);
8337}
8338
8339/**
8340 * xmlParseDocTypeDecl:
8341 * @ctxt: an XML parser context
8342 *
8343 * parse a DOCTYPE declaration
8344 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008345 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008346 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8347 *
8348 * [ VC: Root Element Type ]
8349 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008350 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008351 */
8352
8353void
8354xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008355 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008356 xmlChar *ExternalID = NULL;
8357 xmlChar *URI = NULL;
8358
8359 /*
8360 * We know that '<!DOCTYPE' has been detected.
8361 */
8362 SKIP(9);
8363
8364 SKIP_BLANKS;
8365
8366 /*
8367 * Parse the DOCTYPE name.
8368 */
8369 name = xmlParseName(ctxt);
8370 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008371 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8372 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008373 }
8374 ctxt->intSubName = name;
8375
8376 SKIP_BLANKS;
8377
8378 /*
8379 * Check for SystemID and ExternalID
8380 */
8381 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8382
8383 if ((URI != NULL) || (ExternalID != NULL)) {
8384 ctxt->hasExternalSubset = 1;
8385 }
8386 ctxt->extSubURI = URI;
8387 ctxt->extSubSystem = ExternalID;
8388
8389 SKIP_BLANKS;
8390
8391 /*
8392 * Create and update the internal subset.
8393 */
8394 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8395 (!ctxt->disableSAX))
8396 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008397 if (ctxt->instate == XML_PARSER_EOF)
8398 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008399
8400 /*
8401 * Is there any internal subset declarations ?
8402 * they are handled separately in xmlParseInternalSubset()
8403 */
8404 if (RAW == '[')
8405 return;
8406
8407 /*
8408 * We should be at the end of the DOCTYPE declaration.
8409 */
8410 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008411 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008412 }
8413 NEXT;
8414}
8415
8416/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008417 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008418 * @ctxt: an XML parser context
8419 *
8420 * parse the internal subset declaration
8421 *
8422 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8423 */
8424
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008425static void
Owen Taylor3473f882001-02-23 17:55:21 +00008426xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8427 /*
8428 * Is there any DTD definition ?
8429 */
8430 if (RAW == '[') {
8431 ctxt->instate = XML_PARSER_DTD;
8432 NEXT;
8433 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008434 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008435 * PEReferences.
8436 * Subsequence (markupdecl | PEReference | S)*
8437 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008438 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008439 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008440 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008441
8442 SKIP_BLANKS;
8443 xmlParseMarkupDecl(ctxt);
8444 xmlParsePEReference(ctxt);
8445
8446 /*
8447 * Pop-up of finished entities.
8448 */
8449 while ((RAW == 0) && (ctxt->inputNr > 1))
8450 xmlPopInput(ctxt);
8451
8452 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008453 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008454 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008455 break;
8456 }
8457 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008458 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008459 NEXT;
8460 SKIP_BLANKS;
8461 }
8462 }
8463
8464 /*
8465 * We should be at the end of the DOCTYPE declaration.
8466 */
8467 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008468 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008469 }
8470 NEXT;
8471}
8472
Daniel Veillard81273902003-09-30 00:43:48 +00008473#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008474/**
8475 * xmlParseAttribute:
8476 * @ctxt: an XML parser context
8477 * @value: a xmlChar ** used to store the value of the attribute
8478 *
8479 * parse an attribute
8480 *
8481 * [41] Attribute ::= Name Eq AttValue
8482 *
8483 * [ WFC: No External Entity References ]
8484 * Attribute values cannot contain direct or indirect entity references
8485 * to external entities.
8486 *
8487 * [ WFC: No < in Attribute Values ]
8488 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008489 * an attribute value (other than "&lt;") must not contain a <.
8490 *
Owen Taylor3473f882001-02-23 17:55:21 +00008491 * [ VC: Attribute Value Type ]
8492 * The attribute must have been declared; the value must be of the type
8493 * declared for it.
8494 *
8495 * [25] Eq ::= S? '=' S?
8496 *
8497 * With namespace:
8498 *
8499 * [NS 11] Attribute ::= QName Eq AttValue
8500 *
8501 * Also the case QName == xmlns:??? is handled independently as a namespace
8502 * definition.
8503 *
8504 * Returns the attribute name, and the value in *value.
8505 */
8506
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008507const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008508xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008509 const xmlChar *name;
8510 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008511
8512 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008513 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008514 name = xmlParseName(ctxt);
8515 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008516 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008517 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008518 return(NULL);
8519 }
8520
8521 /*
8522 * read the value
8523 */
8524 SKIP_BLANKS;
8525 if (RAW == '=') {
8526 NEXT;
8527 SKIP_BLANKS;
8528 val = xmlParseAttValue(ctxt);
8529 ctxt->instate = XML_PARSER_CONTENT;
8530 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008531 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008532 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008533 return(NULL);
8534 }
8535
8536 /*
8537 * Check that xml:lang conforms to the specification
8538 * No more registered as an error, just generate a warning now
8539 * since this was deprecated in XML second edition
8540 */
8541 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8542 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008543 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8544 "Malformed value for xml:lang : %s\n",
8545 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008546 }
8547 }
8548
8549 /*
8550 * Check that xml:space conforms to the specification
8551 */
8552 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8553 if (xmlStrEqual(val, BAD_CAST "default"))
8554 *(ctxt->space) = 0;
8555 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8556 *(ctxt->space) = 1;
8557 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008558 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008559"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008560 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008561 }
8562 }
8563
8564 *value = val;
8565 return(name);
8566}
8567
8568/**
8569 * xmlParseStartTag:
8570 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008571 *
Owen Taylor3473f882001-02-23 17:55:21 +00008572 * parse a start of tag either for rule element or
8573 * EmptyElement. In both case we don't parse the tag closing chars.
8574 *
8575 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8576 *
8577 * [ WFC: Unique Att Spec ]
8578 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008579 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008580 *
8581 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8582 *
8583 * [ WFC: Unique Att Spec ]
8584 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008585 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008586 *
8587 * With namespace:
8588 *
8589 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8590 *
8591 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8592 *
8593 * Returns the element name parsed
8594 */
8595
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008596const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008597xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008598 const xmlChar *name;
8599 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008600 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008601 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008602 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008603 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008604 int i;
8605
8606 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008607 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008608
8609 name = xmlParseName(ctxt);
8610 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008611 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008612 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008613 return(NULL);
8614 }
8615
8616 /*
8617 * Now parse the attributes, it ends up with the ending
8618 *
8619 * (S Attribute)* S?
8620 */
8621 SKIP_BLANKS;
8622 GROW;
8623
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008624 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008625 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008626 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008627 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008628 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008629
8630 attname = xmlParseAttribute(ctxt, &attvalue);
8631 if ((attname != NULL) && (attvalue != NULL)) {
8632 /*
8633 * [ WFC: Unique Att Spec ]
8634 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008635 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008636 */
8637 for (i = 0; i < nbatts;i += 2) {
8638 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008639 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008640 xmlFree(attvalue);
8641 goto failed;
8642 }
8643 }
Owen Taylor3473f882001-02-23 17:55:21 +00008644 /*
8645 * Add the pair to atts
8646 */
8647 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008648 maxatts = 22; /* allow for 10 attrs by default */
8649 atts = (const xmlChar **)
8650 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008651 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008652 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008653 if (attvalue != NULL)
8654 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008655 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008656 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008657 ctxt->atts = atts;
8658 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008659 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008660 const xmlChar **n;
8661
Owen Taylor3473f882001-02-23 17:55:21 +00008662 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008663 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008664 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008665 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008666 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008667 if (attvalue != NULL)
8668 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008669 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008670 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008671 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008672 ctxt->atts = atts;
8673 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008674 }
8675 atts[nbatts++] = attname;
8676 atts[nbatts++] = attvalue;
8677 atts[nbatts] = NULL;
8678 atts[nbatts + 1] = NULL;
8679 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008680 if (attvalue != NULL)
8681 xmlFree(attvalue);
8682 }
8683
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008684failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008685
Daniel Veillard3772de32002-12-17 10:31:45 +00008686 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008687 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8688 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008689 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008690 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8691 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008692 }
8693 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008694 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8695 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008696 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8697 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008698 break;
8699 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008700 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008701 GROW;
8702 }
8703
8704 /*
8705 * SAX: Start of Element !
8706 */
8707 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008708 (!ctxt->disableSAX)) {
8709 if (nbatts > 0)
8710 ctxt->sax->startElement(ctxt->userData, name, atts);
8711 else
8712 ctxt->sax->startElement(ctxt->userData, name, NULL);
8713 }
Owen Taylor3473f882001-02-23 17:55:21 +00008714
8715 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008716 /* Free only the content strings */
8717 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008718 if (atts[i] != NULL)
8719 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008720 }
8721 return(name);
8722}
8723
8724/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008725 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008726 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008727 * @line: line of the start tag
8728 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008729 *
8730 * parse an end of tag
8731 *
8732 * [42] ETag ::= '</' Name S? '>'
8733 *
8734 * With namespace
8735 *
8736 * [NS 9] ETag ::= '</' QName S? '>'
8737 */
8738
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008739static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008740xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008741 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008742
8743 GROW;
8744 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008745 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008746 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008747 return;
8748 }
8749 SKIP(2);
8750
Daniel Veillard46de64e2002-05-29 08:21:33 +00008751 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008752
8753 /*
8754 * We should definitely be at the ending "S? '>'" part
8755 */
8756 GROW;
8757 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008758 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008759 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008760 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008761 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008762
8763 /*
8764 * [ WFC: Element Type Match ]
8765 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008766 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008767 *
8768 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008769 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008770 if (name == NULL) name = BAD_CAST "unparseable";
8771 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008772 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008773 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008774 }
8775
8776 /*
8777 * SAX: End of Tag
8778 */
8779 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8780 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008781 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008782
Daniel Veillarde57ec792003-09-10 10:50:59 +00008783 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008784 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008785 return;
8786}
8787
8788/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008789 * xmlParseEndTag:
8790 * @ctxt: an XML parser context
8791 *
8792 * parse an end of tag
8793 *
8794 * [42] ETag ::= '</' Name S? '>'
8795 *
8796 * With namespace
8797 *
8798 * [NS 9] ETag ::= '</' QName S? '>'
8799 */
8800
8801void
8802xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008803 xmlParseEndTag1(ctxt, 0);
8804}
Daniel Veillard81273902003-09-30 00:43:48 +00008805#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008806
8807/************************************************************************
8808 * *
8809 * SAX 2 specific operations *
8810 * *
8811 ************************************************************************/
8812
Daniel Veillard0fb18932003-09-07 09:14:37 +00008813/*
8814 * xmlGetNamespace:
8815 * @ctxt: an XML parser context
8816 * @prefix: the prefix to lookup
8817 *
8818 * Lookup the namespace name for the @prefix (which ca be NULL)
8819 * The prefix must come from the @ctxt->dict dictionnary
8820 *
8821 * Returns the namespace name or NULL if not bound
8822 */
8823static const xmlChar *
8824xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8825 int i;
8826
Daniel Veillarde57ec792003-09-10 10:50:59 +00008827 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008828 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008829 if (ctxt->nsTab[i] == prefix) {
8830 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8831 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008833 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008834 return(NULL);
8835}
8836
8837/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008838 * xmlParseQName:
8839 * @ctxt: an XML parser context
8840 * @prefix: pointer to store the prefix part
8841 *
8842 * parse an XML Namespace QName
8843 *
8844 * [6] QName ::= (Prefix ':')? LocalPart
8845 * [7] Prefix ::= NCName
8846 * [8] LocalPart ::= NCName
8847 *
8848 * Returns the Name parsed or NULL
8849 */
8850
8851static const xmlChar *
8852xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8853 const xmlChar *l, *p;
8854
8855 GROW;
8856
8857 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008858 if (l == NULL) {
8859 if (CUR == ':') {
8860 l = xmlParseName(ctxt);
8861 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008862 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008863 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008864 *prefix = NULL;
8865 return(l);
8866 }
8867 }
8868 return(NULL);
8869 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008870 if (CUR == ':') {
8871 NEXT;
8872 p = l;
8873 l = xmlParseNCName(ctxt);
8874 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008875 xmlChar *tmp;
8876
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008877 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8878 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008879 l = xmlParseNmtoken(ctxt);
8880 if (l == NULL)
8881 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8882 else {
8883 tmp = xmlBuildQName(l, p, NULL, 0);
8884 xmlFree((char *)l);
8885 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008886 p = xmlDictLookup(ctxt->dict, tmp, -1);
8887 if (tmp != NULL) xmlFree(tmp);
8888 *prefix = NULL;
8889 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008890 }
8891 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008892 xmlChar *tmp;
8893
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008894 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008896 NEXT;
8897 tmp = (xmlChar *) xmlParseName(ctxt);
8898 if (tmp != NULL) {
8899 tmp = xmlBuildQName(tmp, l, NULL, 0);
8900 l = xmlDictLookup(ctxt->dict, tmp, -1);
8901 if (tmp != NULL) xmlFree(tmp);
8902 *prefix = p;
8903 return(l);
8904 }
8905 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8906 l = xmlDictLookup(ctxt->dict, tmp, -1);
8907 if (tmp != NULL) xmlFree(tmp);
8908 *prefix = p;
8909 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008910 }
8911 *prefix = p;
8912 } else
8913 *prefix = NULL;
8914 return(l);
8915}
8916
8917/**
8918 * xmlParseQNameAndCompare:
8919 * @ctxt: an XML parser context
8920 * @name: the localname
8921 * @prefix: the prefix, if any.
8922 *
8923 * parse an XML name and compares for match
8924 * (specialized for endtag parsing)
8925 *
8926 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8927 * and the name for mismatch
8928 */
8929
8930static const xmlChar *
8931xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8932 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008933 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934 const xmlChar *in;
8935 const xmlChar *ret;
8936 const xmlChar *prefix2;
8937
8938 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8939
8940 GROW;
8941 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008942
Daniel Veillard0fb18932003-09-07 09:14:37 +00008943 cmp = prefix;
8944 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008945 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008946 ++cmp;
8947 }
8948 if ((*cmp == 0) && (*in == ':')) {
8949 in++;
8950 cmp = name;
8951 while (*in != 0 && *in == *cmp) {
8952 ++in;
8953 ++cmp;
8954 }
William M. Brack76e95df2003-10-18 16:20:14 +00008955 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008956 /* success */
8957 ctxt->input->cur = in;
8958 return((const xmlChar*) 1);
8959 }
8960 }
8961 /*
8962 * all strings coms from the dictionary, equality can be done directly
8963 */
8964 ret = xmlParseQName (ctxt, &prefix2);
8965 if ((ret == name) && (prefix == prefix2))
8966 return((const xmlChar*) 1);
8967 return ret;
8968}
8969
8970/**
8971 * xmlParseAttValueInternal:
8972 * @ctxt: an XML parser context
8973 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008974 * @alloc: whether the attribute was reallocated as a new string
8975 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008976 *
8977 * parse a value for an attribute.
8978 * NOTE: if no normalization is needed, the routine will return pointers
8979 * directly from the data buffer.
8980 *
8981 * 3.3.3 Attribute-Value Normalization:
8982 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008983 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008984 * - a character reference is processed by appending the referenced
8985 * character to the attribute value
8986 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008987 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008988 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8989 * appending #x20 to the normalized value, except that only a single
8990 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008991 * parsed entity or the literal entity value of an internal parsed entity
8992 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008993 * If the declared value is not CDATA, then the XML processor must further
8994 * process the normalized attribute value by discarding any leading and
8995 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008996 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008997 * All attributes for which no declaration has been read should be treated
8998 * by a non-validating parser as if declared CDATA.
8999 *
9000 * Returns the AttValue parsed or NULL. The value has to be freed by the
9001 * caller if it was copied, this can be detected by val[*len] == 0.
9002 */
9003
9004static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009005xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9006 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009007{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009008 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009009 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009010 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08009011 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009012
9013 GROW;
9014 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08009015 line = ctxt->input->line;
9016 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009017 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009018 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009019 return (NULL);
9020 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009021 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009022
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009023 /*
9024 * try to handle in this routine the most common case where no
9025 * allocation of a new string is required and where content is
9026 * pure ASCII.
9027 */
9028 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009029 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009030 end = ctxt->input->end;
9031 start = in;
9032 if (in >= end) {
9033 const xmlChar *oldbase = ctxt->input->base;
9034 GROW;
9035 if (oldbase != ctxt->input->base) {
9036 long delta = ctxt->input->base - oldbase;
9037 start = start + delta;
9038 in = in + delta;
9039 }
9040 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009041 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009042 if (normalize) {
9043 /*
9044 * Skip any leading spaces
9045 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009046 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009047 ((*in == 0x20) || (*in == 0x9) ||
9048 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009049 if (*in == 0xA) {
9050 line++; col = 1;
9051 } else {
9052 col++;
9053 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009054 in++;
9055 start = in;
9056 if (in >= end) {
9057 const xmlChar *oldbase = ctxt->input->base;
9058 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009059 if (ctxt->instate == XML_PARSER_EOF)
9060 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009061 if (oldbase != ctxt->input->base) {
9062 long delta = ctxt->input->base - oldbase;
9063 start = start + delta;
9064 in = in + delta;
9065 }
9066 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009067 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9068 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9069 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009070 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009071 return(NULL);
9072 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009073 }
9074 }
9075 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9076 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009077 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009078 if ((*in++ == 0x20) && (*in == 0x20)) break;
9079 if (in >= end) {
9080 const xmlChar *oldbase = ctxt->input->base;
9081 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009082 if (ctxt->instate == XML_PARSER_EOF)
9083 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009084 if (oldbase != ctxt->input->base) {
9085 long delta = ctxt->input->base - oldbase;
9086 start = start + delta;
9087 in = in + delta;
9088 }
9089 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009090 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9091 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9092 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009093 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009094 return(NULL);
9095 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009096 }
9097 }
9098 last = in;
9099 /*
9100 * skip the trailing blanks
9101 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009102 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009103 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009104 ((*in == 0x20) || (*in == 0x9) ||
9105 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009106 if (*in == 0xA) {
9107 line++, col = 1;
9108 } else {
9109 col++;
9110 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009111 in++;
9112 if (in >= end) {
9113 const xmlChar *oldbase = ctxt->input->base;
9114 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009115 if (ctxt->instate == XML_PARSER_EOF)
9116 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009117 if (oldbase != ctxt->input->base) {
9118 long delta = ctxt->input->base - oldbase;
9119 start = start + delta;
9120 in = in + delta;
9121 last = last + delta;
9122 }
9123 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009124 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9125 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9126 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009127 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009128 return(NULL);
9129 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009130 }
9131 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009132 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9133 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9134 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009135 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009136 return(NULL);
9137 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009138 if (*in != limit) goto need_complex;
9139 } else {
9140 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9141 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9142 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009143 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009144 if (in >= end) {
9145 const xmlChar *oldbase = ctxt->input->base;
9146 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009147 if (ctxt->instate == XML_PARSER_EOF)
9148 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009149 if (oldbase != ctxt->input->base) {
9150 long delta = ctxt->input->base - oldbase;
9151 start = start + delta;
9152 in = in + delta;
9153 }
9154 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009155 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9156 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9157 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009158 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009159 return(NULL);
9160 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009161 }
9162 }
9163 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009164 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9165 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9166 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009167 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009168 return(NULL);
9169 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009170 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009171 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009172 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009173 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009174 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009175 *len = last - start;
9176 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009177 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009178 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009179 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009180 }
9181 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009182 ctxt->input->line = line;
9183 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009184 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009185 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009186need_complex:
9187 if (alloc) *alloc = 1;
9188 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009189}
9190
9191/**
9192 * xmlParseAttribute2:
9193 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009194 * @pref: the element prefix
9195 * @elem: the element name
9196 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009197 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009198 * @len: an int * to save the length of the attribute
9199 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009200 *
9201 * parse an attribute in the new SAX2 framework.
9202 *
9203 * Returns the attribute name, and the value in *value, .
9204 */
9205
9206static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009207xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009208 const xmlChar * pref, const xmlChar * elem,
9209 const xmlChar ** prefix, xmlChar ** value,
9210 int *len, int *alloc)
9211{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009212 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009213 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009214 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009215
9216 *value = NULL;
9217 GROW;
9218 name = xmlParseQName(ctxt, prefix);
9219 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009220 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9221 "error parsing attribute name\n");
9222 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009223 }
9224
9225 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009226 * get the type if needed
9227 */
9228 if (ctxt->attsSpecial != NULL) {
9229 int type;
9230
9231 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009232 pref, elem, *prefix, name);
9233 if (type != 0)
9234 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009235 }
9236
9237 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009238 * read the value
9239 */
9240 SKIP_BLANKS;
9241 if (RAW == '=') {
9242 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009243 SKIP_BLANKS;
9244 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9245 if (normalize) {
9246 /*
9247 * Sometimes a second normalisation pass for spaces is needed
9248 * but that only happens if charrefs or entities refernces
9249 * have been used in the attribute value, i.e. the attribute
9250 * value have been extracted in an allocated string already.
9251 */
9252 if (*alloc) {
9253 const xmlChar *val2;
9254
9255 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009256 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009257 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009258 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009259 }
9260 }
9261 }
9262 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009263 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009264 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9265 "Specification mandate value for attribute %s\n",
9266 name);
9267 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009268 }
9269
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009270 if (*prefix == ctxt->str_xml) {
9271 /*
9272 * Check that xml:lang conforms to the specification
9273 * No more registered as an error, just generate a warning now
9274 * since this was deprecated in XML second edition
9275 */
9276 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9277 internal_val = xmlStrndup(val, *len);
9278 if (!xmlCheckLanguageID(internal_val)) {
9279 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9280 "Malformed value for xml:lang : %s\n",
9281 internal_val, NULL);
9282 }
9283 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009284
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009285 /*
9286 * Check that xml:space conforms to the specification
9287 */
9288 if (xmlStrEqual(name, BAD_CAST "space")) {
9289 internal_val = xmlStrndup(val, *len);
9290 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9291 *(ctxt->space) = 0;
9292 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9293 *(ctxt->space) = 1;
9294 else {
9295 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9296 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9297 internal_val, NULL);
9298 }
9299 }
9300 if (internal_val) {
9301 xmlFree(internal_val);
9302 }
9303 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009304
9305 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009306 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009307}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009308/**
9309 * xmlParseStartTag2:
9310 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009311 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009312 * parse a start of tag either for rule element or
9313 * EmptyElement. In both case we don't parse the tag closing chars.
9314 * This routine is called when running SAX2 parsing
9315 *
9316 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9317 *
9318 * [ WFC: Unique Att Spec ]
9319 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009320 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009321 *
9322 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9323 *
9324 * [ WFC: Unique Att Spec ]
9325 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009326 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009327 *
9328 * With namespace:
9329 *
9330 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9331 *
9332 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9333 *
9334 * Returns the element name parsed
9335 */
9336
9337static const xmlChar *
9338xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009339 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009340 const xmlChar *localname;
9341 const xmlChar *prefix;
9342 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009343 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009344 const xmlChar *nsname;
9345 xmlChar *attvalue;
9346 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009347 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009348 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009349 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009350 const xmlChar *base;
9351 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009352 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009353
9354 if (RAW != '<') return(NULL);
9355 NEXT1;
9356
9357 /*
9358 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9359 * point since the attribute values may be stored as pointers to
9360 * the buffer and calling SHRINK would destroy them !
9361 * The Shrinking is only possible once the full set of attribute
9362 * callbacks have been done.
9363 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009364reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009365 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009366 base = ctxt->input->base;
9367 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009368 oldline = ctxt->input->line;
9369 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009370 nbatts = 0;
9371 nratts = 0;
9372 nbdef = 0;
9373 nbNs = 0;
9374 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009375 /* Forget any namespaces added during an earlier parse of this element. */
9376 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009377
9378 localname = xmlParseQName(ctxt, &prefix);
9379 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009380 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9381 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009382 return(NULL);
9383 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009384 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009385
9386 /*
9387 * Now parse the attributes, it ends up with the ending
9388 *
9389 * (S Attribute)* S?
9390 */
9391 SKIP_BLANKS;
9392 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009393 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009394
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009395 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009396 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009397 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009398 const xmlChar *q = CUR_PTR;
9399 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009400 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009401
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009402 attname = xmlParseAttribute2(ctxt, prefix, localname,
9403 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009404 if (ctxt->input->base != base) {
9405 if ((attvalue != NULL) && (alloc != 0))
9406 xmlFree(attvalue);
9407 attvalue = NULL;
9408 goto base_changed;
9409 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009410 if ((attname != NULL) && (attvalue != NULL)) {
9411 if (len < 0) len = xmlStrlen(attvalue);
9412 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009413 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9414 xmlURIPtr uri;
9415
Daniel Veillardc836ba62014-07-14 16:39:50 +08009416 if (URL == NULL) {
9417 xmlErrMemory(ctxt, "dictionary allocation failure");
9418 if ((attvalue != NULL) && (alloc != 0))
9419 xmlFree(attvalue);
9420 return(NULL);
9421 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009422 if (*URL != 0) {
9423 uri = xmlParseURI((const char *) URL);
9424 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009425 xmlNsErr(ctxt, XML_WAR_NS_URI,
9426 "xmlns: '%s' is not a valid URI\n",
9427 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009428 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009429 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009430 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9431 "xmlns: URI %s is not absolute\n",
9432 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009433 }
9434 xmlFreeURI(uri);
9435 }
Daniel Veillard37334572008-07-31 08:20:02 +00009436 if (URL == ctxt->str_xml_ns) {
9437 if (attname != ctxt->str_xml) {
9438 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9439 "xml namespace URI cannot be the default namespace\n",
9440 NULL, NULL, NULL);
9441 }
9442 goto skip_default_ns;
9443 }
9444 if ((len == 29) &&
9445 (xmlStrEqual(URL,
9446 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9447 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9448 "reuse of the xmlns namespace name is forbidden\n",
9449 NULL, NULL, NULL);
9450 goto skip_default_ns;
9451 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009452 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009453 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009454 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009455 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009456 for (j = 1;j <= nbNs;j++)
9457 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9458 break;
9459 if (j <= nbNs)
9460 xmlErrAttributeDup(ctxt, NULL, attname);
9461 else
9462 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009463skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009464 if (alloc != 0) xmlFree(attvalue);
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009465 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9466 break;
9467 if (!IS_BLANK_CH(RAW)) {
9468 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9469 "attributes construct error\n");
9470 break;
9471 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009472 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009473 continue;
9474 }
9475 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009476 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477 xmlURIPtr uri;
9478
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009479 if (attname == ctxt->str_xml) {
9480 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009481 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482 "xml namespace prefix mapped to wrong URI\n",
9483 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009484 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009485 /*
9486 * Do not keep a namespace definition node
9487 */
Daniel Veillard37334572008-07-31 08:20:02 +00009488 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009489 }
Daniel Veillard37334572008-07-31 08:20:02 +00009490 if (URL == ctxt->str_xml_ns) {
9491 if (attname != ctxt->str_xml) {
9492 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493 "xml namespace URI mapped to wrong prefix\n",
9494 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009495 }
Daniel Veillard37334572008-07-31 08:20:02 +00009496 goto skip_ns;
9497 }
9498 if (attname == ctxt->str_xmlns) {
9499 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500 "redefinition of the xmlns prefix is forbidden\n",
9501 NULL, NULL, NULL);
9502 goto skip_ns;
9503 }
9504 if ((len == 29) &&
9505 (xmlStrEqual(URL,
9506 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508 "reuse of the xmlns namespace name is forbidden\n",
9509 NULL, NULL, NULL);
9510 goto skip_ns;
9511 }
9512 if ((URL == NULL) || (URL[0] == 0)) {
9513 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514 "xmlns:%s: Empty XML namespace is not allowed\n",
9515 attname, NULL, NULL);
9516 goto skip_ns;
9517 } else {
9518 uri = xmlParseURI((const char *) URL);
9519 if (uri == NULL) {
9520 xmlNsErr(ctxt, XML_WAR_NS_URI,
9521 "xmlns:%s: '%s' is not a valid URI\n",
9522 attname, URL, NULL);
9523 } else {
9524 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526 "xmlns:%s: URI %s is not absolute\n",
9527 attname, URL, NULL);
9528 }
9529 xmlFreeURI(uri);
9530 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009531 }
9532
Daniel Veillard0fb18932003-09-07 09:14:37 +00009533 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009534 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009535 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009536 for (j = 1;j <= nbNs;j++)
9537 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538 break;
9539 if (j <= nbNs)
9540 xmlErrAttributeDup(ctxt, aprefix, attname);
9541 else
9542 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009543skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009544 if (alloc != 0) xmlFree(attvalue);
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009545 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9546 break;
9547 if (!IS_BLANK_CH(RAW)) {
9548 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9549 "attributes construct error\n");
9550 break;
9551 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009552 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009553 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009554 continue;
9555 }
9556
9557 /*
9558 * Add the pair to atts
9559 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009560 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9561 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009562 if (attvalue[len] == 0)
9563 xmlFree(attvalue);
9564 goto failed;
9565 }
9566 maxatts = ctxt->maxatts;
9567 atts = ctxt->atts;
9568 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009569 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009570 atts[nbatts++] = attname;
9571 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009572 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009573 atts[nbatts++] = attvalue;
9574 attvalue += len;
9575 atts[nbatts++] = attvalue;
9576 /*
9577 * tag if some deallocation is needed
9578 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009579 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009580 } else {
9581 if ((attvalue != NULL) && (attvalue[len] == 0))
9582 xmlFree(attvalue);
9583 }
9584
Daniel Veillard37334572008-07-31 08:20:02 +00009585failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009586
9587 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009588 if (ctxt->instate == XML_PARSER_EOF)
9589 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009590 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009591 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9592 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009593 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009594 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9595 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009596 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009597 }
9598 SKIP_BLANKS;
9599 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9600 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009601 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009602 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009603 break;
9604 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009605 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009606 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009607 }
9608
Daniel Veillard0fb18932003-09-07 09:14:37 +00009609 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009610 * The attributes defaulting
9611 */
9612 if (ctxt->attsDefault != NULL) {
9613 xmlDefAttrsPtr defaults;
9614
9615 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9616 if (defaults != NULL) {
9617 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009618 attname = defaults->values[5 * i];
9619 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009620
9621 /*
9622 * special work for namespaces defaulted defs
9623 */
9624 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9625 /*
9626 * check that it's not a defined namespace
9627 */
9628 for (j = 1;j <= nbNs;j++)
9629 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9630 break;
9631 if (j <= nbNs) continue;
9632
9633 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009634 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009635 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009636 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009637 nbNs++;
9638 }
9639 } else if (aprefix == ctxt->str_xmlns) {
9640 /*
9641 * check that it's not a defined namespace
9642 */
9643 for (j = 1;j <= nbNs;j++)
9644 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9645 break;
9646 if (j <= nbNs) continue;
9647
9648 nsname = xmlGetNamespace(ctxt, attname);
9649 if (nsname != defaults->values[2]) {
9650 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009651 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009652 nbNs++;
9653 }
9654 } else {
9655 /*
9656 * check that it's not a defined attribute
9657 */
9658 for (j = 0;j < nbatts;j+=5) {
9659 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9660 break;
9661 }
9662 if (j < nbatts) continue;
9663
9664 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9665 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009666 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009667 }
9668 maxatts = ctxt->maxatts;
9669 atts = ctxt->atts;
9670 }
9671 atts[nbatts++] = attname;
9672 atts[nbatts++] = aprefix;
9673 if (aprefix == NULL)
9674 atts[nbatts++] = NULL;
9675 else
9676 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009677 atts[nbatts++] = defaults->values[5 * i + 2];
9678 atts[nbatts++] = defaults->values[5 * i + 3];
9679 if ((ctxt->standalone == 1) &&
9680 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009681 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009682 "standalone: attribute %s on %s defaulted from external subset\n",
9683 attname, localname);
9684 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009685 nbdef++;
9686 }
9687 }
9688 }
9689 }
9690
Daniel Veillarde70c8772003-11-25 07:21:18 +00009691 /*
9692 * The attributes checkings
9693 */
9694 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009695 /*
9696 * The default namespace does not apply to attribute names.
9697 */
9698 if (atts[i + 1] != NULL) {
9699 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9700 if (nsname == NULL) {
9701 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9702 "Namespace prefix %s for %s on %s is not defined\n",
9703 atts[i + 1], atts[i], localname);
9704 }
9705 atts[i + 2] = nsname;
9706 } else
9707 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009708 /*
9709 * [ WFC: Unique Att Spec ]
9710 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009711 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009712 * As extended by the Namespace in XML REC.
9713 */
9714 for (j = 0; j < i;j += 5) {
9715 if (atts[i] == atts[j]) {
9716 if (atts[i+1] == atts[j+1]) {
9717 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9718 break;
9719 }
9720 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9721 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9722 "Namespaced Attribute %s in '%s' redefined\n",
9723 atts[i], nsname, NULL);
9724 break;
9725 }
9726 }
9727 }
9728 }
9729
Daniel Veillarde57ec792003-09-10 10:50:59 +00009730 nsname = xmlGetNamespace(ctxt, prefix);
9731 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009732 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9733 "Namespace prefix %s on %s is not defined\n",
9734 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009735 }
9736 *pref = prefix;
9737 *URI = nsname;
9738
9739 /*
9740 * SAX: Start of Element !
9741 */
9742 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9743 (!ctxt->disableSAX)) {
9744 if (nbNs > 0)
9745 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9746 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9747 nbatts / 5, nbdef, atts);
9748 else
9749 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9750 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9751 }
9752
9753 /*
9754 * Free up attribute allocated strings if needed
9755 */
9756 if (attval != 0) {
9757 for (i = 3,j = 0; j < nratts;i += 5,j++)
9758 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9759 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009760 }
9761
9762 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009763
9764base_changed:
9765 /*
9766 * the attribute strings are valid iif the base didn't changed
9767 */
9768 if (attval != 0) {
9769 for (i = 3,j = 0; j < nratts;i += 5,j++)
9770 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9771 xmlFree((xmlChar *) atts[i]);
9772 }
9773 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009774 ctxt->input->line = oldline;
9775 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009776 if (ctxt->wellFormed == 1) {
9777 goto reparse;
9778 }
9779 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009780}
9781
9782/**
9783 * xmlParseEndTag2:
9784 * @ctxt: an XML parser context
9785 * @line: line of the start tag
9786 * @nsNr: number of namespaces on the start tag
9787 *
9788 * parse an end of tag
9789 *
9790 * [42] ETag ::= '</' Name S? '>'
9791 *
9792 * With namespace
9793 *
9794 * [NS 9] ETag ::= '</' QName S? '>'
9795 */
9796
9797static void
9798xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009799 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009800 const xmlChar *name;
9801
9802 GROW;
9803 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009804 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009805 return;
9806 }
9807 SKIP(2);
9808
William M. Brack13dfa872004-09-18 04:52:08 +00009809 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009810 if (ctxt->input->cur[tlen] == '>') {
9811 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009812 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009813 goto done;
9814 }
9815 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009816 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009817 name = (xmlChar*)1;
9818 } else {
9819 if (prefix == NULL)
9820 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9821 else
9822 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9823 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009824
9825 /*
9826 * We should definitely be at the ending "S? '>'" part
9827 */
9828 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009829 if (ctxt->instate == XML_PARSER_EOF)
9830 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009831 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009832 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009833 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009834 } else
9835 NEXT1;
9836
9837 /*
9838 * [ WFC: Element Type Match ]
9839 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009840 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009841 *
9842 */
9843 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009844 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009845 if ((line == 0) && (ctxt->node != NULL))
9846 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009847 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009848 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009849 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009850 }
9851
9852 /*
9853 * SAX: End of Tag
9854 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009855done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009856 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9857 (!ctxt->disableSAX))
9858 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9859
Daniel Veillard0fb18932003-09-07 09:14:37 +00009860 spacePop(ctxt);
9861 if (nsNr != 0)
9862 nsPop(ctxt, nsNr);
9863 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009864}
9865
9866/**
Owen Taylor3473f882001-02-23 17:55:21 +00009867 * xmlParseCDSect:
9868 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009869 *
Owen Taylor3473f882001-02-23 17:55:21 +00009870 * Parse escaped pure raw content.
9871 *
9872 * [18] CDSect ::= CDStart CData CDEnd
9873 *
9874 * [19] CDStart ::= '<![CDATA['
9875 *
9876 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9877 *
9878 * [21] CDEnd ::= ']]>'
9879 */
9880void
9881xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9882 xmlChar *buf = NULL;
9883 int len = 0;
9884 int size = XML_PARSER_BUFFER_SIZE;
9885 int r, rl;
9886 int s, sl;
9887 int cur, l;
9888 int count = 0;
9889
Daniel Veillard8f597c32003-10-06 08:19:27 +00009890 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009891 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009892 SKIP(9);
9893 } else
9894 return;
9895
9896 ctxt->instate = XML_PARSER_CDATA_SECTION;
9897 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009898 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009899 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009900 ctxt->instate = XML_PARSER_CONTENT;
9901 return;
9902 }
9903 NEXTL(rl);
9904 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009905 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009906 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009907 ctxt->instate = XML_PARSER_CONTENT;
9908 return;
9909 }
9910 NEXTL(sl);
9911 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009912 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009913 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009914 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009915 return;
9916 }
William M. Brack871611b2003-10-18 04:53:14 +00009917 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009918 ((r != ']') || (s != ']') || (cur != '>'))) {
9919 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009920 xmlChar *tmp;
9921
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009922 if ((size > XML_MAX_TEXT_LENGTH) &&
9923 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9924 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9925 "CData section too big found", NULL);
9926 xmlFree (buf);
9927 return;
9928 }
9929 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009930 if (tmp == NULL) {
9931 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009932 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009933 return;
9934 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009935 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009936 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009937 }
9938 COPY_BUF(rl,buf,len,r);
9939 r = s;
9940 rl = sl;
9941 s = cur;
9942 sl = l;
9943 count++;
9944 if (count > 50) {
9945 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009946 if (ctxt->instate == XML_PARSER_EOF) {
9947 xmlFree(buf);
9948 return;
9949 }
Owen Taylor3473f882001-02-23 17:55:21 +00009950 count = 0;
9951 }
9952 NEXTL(l);
9953 cur = CUR_CHAR(l);
9954 }
9955 buf[len] = 0;
9956 ctxt->instate = XML_PARSER_CONTENT;
9957 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009958 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009959 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009960 xmlFree(buf);
9961 return;
9962 }
9963 NEXTL(l);
9964
9965 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009966 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009967 */
9968 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9969 if (ctxt->sax->cdataBlock != NULL)
9970 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009971 else if (ctxt->sax->characters != NULL)
9972 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009973 }
9974 xmlFree(buf);
9975}
9976
9977/**
9978 * xmlParseContent:
9979 * @ctxt: an XML parser context
9980 *
9981 * Parse a content:
9982 *
9983 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9984 */
9985
9986void
9987xmlParseContent(xmlParserCtxtPtr ctxt) {
9988 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009989 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009990 ((RAW != '<') || (NXT(1) != '/')) &&
9991 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009992 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009993 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009994 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009995
9996 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009997 * First case : a Processing Instruction.
9998 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009999 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010000 xmlParsePI(ctxt);
10001 }
10002
10003 /*
10004 * Second case : a CDSection
10005 */
Daniel Veillard8f597c32003-10-06 08:19:27 +000010006 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010007 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010008 xmlParseCDSect(ctxt);
10009 }
10010
10011 /*
10012 * Third case : a comment
10013 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010014 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010015 (NXT(2) == '-') && (NXT(3) == '-')) {
10016 xmlParseComment(ctxt);
10017 ctxt->instate = XML_PARSER_CONTENT;
10018 }
10019
10020 /*
10021 * Fourth case : a sub-element.
10022 */
Daniel Veillard21a0f912001-02-25 19:54:14 +000010023 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +000010024 xmlParseElement(ctxt);
10025 }
10026
10027 /*
10028 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010029 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +000010030 */
10031
Daniel Veillard21a0f912001-02-25 19:54:14 +000010032 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +000010033 xmlParseReference(ctxt);
10034 }
10035
10036 /*
10037 * Last case, text. Note that References are handled directly.
10038 */
10039 else {
10040 xmlParseCharData(ctxt, 0);
10041 }
10042
10043 GROW;
10044 /*
10045 * Pop-up of finished entities.
10046 */
Daniel Veillard561b7f82002-03-20 21:55:57 +000010047 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +000010048 xmlPopInput(ctxt);
10049 SHRINK;
10050
Daniel Veillardfdc91562002-07-01 21:52:03 +000010051 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010052 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10053 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080010054 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010055 break;
10056 }
10057 }
10058}
10059
10060/**
10061 * xmlParseElement:
10062 * @ctxt: an XML parser context
10063 *
10064 * parse an XML element, this is highly recursive
10065 *
10066 * [39] element ::= EmptyElemTag | STag content ETag
10067 *
10068 * [ WFC: Element Type Match ]
10069 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010070 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +000010071 *
Owen Taylor3473f882001-02-23 17:55:21 +000010072 */
10073
10074void
10075xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010076 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010077 const xmlChar *prefix = NULL;
10078 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010079 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010080 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010081 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010082 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010083
Daniel Veillard8915c152008-08-26 13:05:34 +000010084 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10085 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10086 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10087 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10088 xmlParserMaxDepth);
Daniel Veillarde3b15972015-11-20 14:59:30 +080010089 xmlHaltParser(ctxt);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010090 return;
10091 }
10092
Owen Taylor3473f882001-02-23 17:55:21 +000010093 /* Capture start position */
10094 if (ctxt->record_info) {
10095 node_info.begin_pos = ctxt->input->consumed +
10096 (CUR_PTR - ctxt->input->base);
10097 node_info.begin_line = ctxt->input->line;
10098 }
10099
10100 if (ctxt->spaceNr == 0)
10101 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010102 else if (*ctxt->space == -2)
10103 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010104 else
10105 spacePush(ctxt, *ctxt->space);
10106
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010107 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010108#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010109 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010110#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010111 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010112#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010113 else
10114 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010115#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010116 if (ctxt->instate == XML_PARSER_EOF)
10117 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010118 if (name == NULL) {
10119 spacePop(ctxt);
10120 return;
10121 }
10122 namePush(ctxt, name);
10123 ret = ctxt->node;
10124
Daniel Veillard4432df22003-09-28 18:58:27 +000010125#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010126 /*
10127 * [ VC: Root Element Type ]
10128 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010129 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010130 */
10131 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10132 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10133 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010134#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010135
10136 /*
10137 * Check for an Empty Element.
10138 */
10139 if ((RAW == '/') && (NXT(1) == '>')) {
10140 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010141 if (ctxt->sax2) {
10142 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10143 (!ctxt->disableSAX))
10144 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010145#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010146 } else {
10147 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10148 (!ctxt->disableSAX))
10149 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010150#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010151 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010152 namePop(ctxt);
10153 spacePop(ctxt);
10154 if (nsNr != ctxt->nsNr)
10155 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010156 if ( ret != NULL && ctxt->record_info ) {
10157 node_info.end_pos = ctxt->input->consumed +
10158 (CUR_PTR - ctxt->input->base);
10159 node_info.end_line = ctxt->input->line;
10160 node_info.node = ret;
10161 xmlParserAddNodeInfo(ctxt, &node_info);
10162 }
10163 return;
10164 }
10165 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010166 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010167 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010168 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10169 "Couldn't find end of Start Tag %s line %d\n",
10170 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010171
10172 /*
10173 * end of parsing of this node.
10174 */
10175 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010176 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010177 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010178 if (nsNr != ctxt->nsNr)
10179 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010180
10181 /*
10182 * Capture end position and add node
10183 */
10184 if ( ret != NULL && ctxt->record_info ) {
10185 node_info.end_pos = ctxt->input->consumed +
10186 (CUR_PTR - ctxt->input->base);
10187 node_info.end_line = ctxt->input->line;
10188 node_info.node = ret;
10189 xmlParserAddNodeInfo(ctxt, &node_info);
10190 }
10191 return;
10192 }
10193
10194 /*
10195 * Parse the content of the element:
10196 */
10197 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010198 if (ctxt->instate == XML_PARSER_EOF)
10199 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010200 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010201 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010202 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010203 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010204
10205 /*
10206 * end of parsing of this node.
10207 */
10208 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010209 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010210 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010211 if (nsNr != ctxt->nsNr)
10212 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010213 return;
10214 }
10215
10216 /*
10217 * parse the end of tag: '</' should be here.
10218 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010219 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010220 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010221 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010222 }
10223#ifdef LIBXML_SAX1_ENABLED
10224 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010225 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010226#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010227
10228 /*
10229 * Capture end position and add node
10230 */
10231 if ( ret != NULL && ctxt->record_info ) {
10232 node_info.end_pos = ctxt->input->consumed +
10233 (CUR_PTR - ctxt->input->base);
10234 node_info.end_line = ctxt->input->line;
10235 node_info.node = ret;
10236 xmlParserAddNodeInfo(ctxt, &node_info);
10237 }
10238}
10239
10240/**
10241 * xmlParseVersionNum:
10242 * @ctxt: an XML parser context
10243 *
10244 * parse the XML version value.
10245 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010246 * [26] VersionNum ::= '1.' [0-9]+
10247 *
10248 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010249 *
10250 * Returns the string giving the XML version number, or NULL
10251 */
10252xmlChar *
10253xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10254 xmlChar *buf = NULL;
10255 int len = 0;
10256 int size = 10;
10257 xmlChar cur;
10258
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010259 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010260 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010261 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010262 return(NULL);
10263 }
10264 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010265 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010266 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010267 return(NULL);
10268 }
10269 buf[len++] = cur;
10270 NEXT;
10271 cur=CUR;
10272 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010273 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010274 return(NULL);
10275 }
10276 buf[len++] = cur;
10277 NEXT;
10278 cur=CUR;
10279 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010280 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010281 xmlChar *tmp;
10282
Owen Taylor3473f882001-02-23 17:55:21 +000010283 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010284 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10285 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010286 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010287 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010288 return(NULL);
10289 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010290 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010291 }
10292 buf[len++] = cur;
10293 NEXT;
10294 cur=CUR;
10295 }
10296 buf[len] = 0;
10297 return(buf);
10298}
10299
10300/**
10301 * xmlParseVersionInfo:
10302 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010303 *
Owen Taylor3473f882001-02-23 17:55:21 +000010304 * parse the XML version.
10305 *
10306 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010307 *
Owen Taylor3473f882001-02-23 17:55:21 +000010308 * [25] Eq ::= S? '=' S?
10309 *
10310 * Returns the version string, e.g. "1.0"
10311 */
10312
10313xmlChar *
10314xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10315 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010316
Daniel Veillarda07050d2003-10-19 14:46:32 +000010317 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010318 SKIP(7);
10319 SKIP_BLANKS;
10320 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010321 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010322 return(NULL);
10323 }
10324 NEXT;
10325 SKIP_BLANKS;
10326 if (RAW == '"') {
10327 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010328 version = xmlParseVersionNum(ctxt);
10329 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010330 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010331 } else
10332 NEXT;
10333 } else if (RAW == '\''){
10334 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010335 version = xmlParseVersionNum(ctxt);
10336 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010337 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010338 } else
10339 NEXT;
10340 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010341 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010342 }
10343 }
10344 return(version);
10345}
10346
10347/**
10348 * xmlParseEncName:
10349 * @ctxt: an XML parser context
10350 *
10351 * parse the XML encoding name
10352 *
10353 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10354 *
10355 * Returns the encoding name value or NULL
10356 */
10357xmlChar *
10358xmlParseEncName(xmlParserCtxtPtr ctxt) {
10359 xmlChar *buf = NULL;
10360 int len = 0;
10361 int size = 10;
10362 xmlChar cur;
10363
10364 cur = CUR;
10365 if (((cur >= 'a') && (cur <= 'z')) ||
10366 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010367 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010368 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010369 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010370 return(NULL);
10371 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010372
Owen Taylor3473f882001-02-23 17:55:21 +000010373 buf[len++] = cur;
10374 NEXT;
10375 cur = CUR;
10376 while (((cur >= 'a') && (cur <= 'z')) ||
10377 ((cur >= 'A') && (cur <= 'Z')) ||
10378 ((cur >= '0') && (cur <= '9')) ||
10379 (cur == '.') || (cur == '_') ||
10380 (cur == '-')) {
10381 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010382 xmlChar *tmp;
10383
Owen Taylor3473f882001-02-23 17:55:21 +000010384 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010385 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10386 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010387 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010388 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010389 return(NULL);
10390 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010391 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010392 }
10393 buf[len++] = cur;
10394 NEXT;
10395 cur = CUR;
10396 if (cur == 0) {
10397 SHRINK;
10398 GROW;
10399 cur = CUR;
10400 }
10401 }
10402 buf[len] = 0;
10403 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010404 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010405 }
10406 return(buf);
10407}
10408
10409/**
10410 * xmlParseEncodingDecl:
10411 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010412 *
Owen Taylor3473f882001-02-23 17:55:21 +000010413 * parse the XML encoding declaration
10414 *
10415 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10416 *
10417 * this setups the conversion filters.
10418 *
10419 * Returns the encoding value or NULL
10420 */
10421
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010422const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010423xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10424 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010425
10426 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010427 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010428 SKIP(8);
10429 SKIP_BLANKS;
10430 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010431 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010432 return(NULL);
10433 }
10434 NEXT;
10435 SKIP_BLANKS;
10436 if (RAW == '"') {
10437 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010438 encoding = xmlParseEncName(ctxt);
10439 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010440 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010441 xmlFree((xmlChar *) encoding);
10442 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010443 } else
10444 NEXT;
10445 } else if (RAW == '\''){
10446 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010447 encoding = xmlParseEncName(ctxt);
10448 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010449 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Daniel Veillard9aa37582015-06-29 09:08:25 +080010450 xmlFree((xmlChar *) encoding);
10451 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010452 } else
10453 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010454 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010455 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010456 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010457
10458 /*
10459 * Non standard parsing, allowing the user to ignore encoding
10460 */
Bart De Schuymer500c54e2014-10-16 12:17:20 +080010461 if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10462 xmlFree((xmlChar *) encoding);
10463 return(NULL);
10464 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010465
Daniel Veillard6b621b82003-08-11 15:03:34 +000010466 /*
10467 * UTF-16 encoding stwich has already taken place at this stage,
10468 * more over the little-endian/big-endian selection is already done
10469 */
10470 if ((encoding != NULL) &&
10471 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10472 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010473 /*
10474 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010475 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010476 * document is apparently UTF-8 compatible, then raise an
10477 * encoding mismatch fatal error
10478 */
10479 if ((ctxt->encoding == NULL) &&
10480 (ctxt->input->buf != NULL) &&
10481 (ctxt->input->buf->encoder == NULL)) {
10482 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10483 "Document labelled UTF-16 but has UTF-8 content\n");
10484 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010485 if (ctxt->encoding != NULL)
10486 xmlFree((xmlChar *) ctxt->encoding);
10487 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010488 }
10489 /*
10490 * UTF-8 encoding is handled natively
10491 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010492 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010493 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10494 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010495 if (ctxt->encoding != NULL)
10496 xmlFree((xmlChar *) ctxt->encoding);
10497 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010498 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010499 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010500 xmlCharEncodingHandlerPtr handler;
10501
10502 if (ctxt->input->encoding != NULL)
10503 xmlFree((xmlChar *) ctxt->input->encoding);
10504 ctxt->input->encoding = encoding;
10505
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010506 handler = xmlFindCharEncodingHandler((const char *) encoding);
10507 if (handler != NULL) {
Daniel Veillard709a9522015-06-29 16:10:26 +080010508 if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10509 /* failed to convert */
10510 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10511 return(NULL);
10512 }
Owen Taylor3473f882001-02-23 17:55:21 +000010513 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010514 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010515 "Unsupported encoding %s\n", encoding);
10516 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010517 }
10518 }
10519 }
10520 return(encoding);
10521}
10522
10523/**
10524 * xmlParseSDDecl:
10525 * @ctxt: an XML parser context
10526 *
10527 * parse the XML standalone declaration
10528 *
10529 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010530 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010531 *
10532 * [ VC: Standalone Document Declaration ]
10533 * TODO The standalone document declaration must have the value "no"
10534 * if any external markup declarations contain declarations of:
10535 * - attributes with default values, if elements to which these
10536 * attributes apply appear in the document without specifications
10537 * of values for these attributes, or
10538 * - entities (other than amp, lt, gt, apos, quot), if references
10539 * to those entities appear in the document, or
10540 * - attributes with values subject to normalization, where the
10541 * attribute appears in the document with a value which will change
10542 * as a result of normalization, or
10543 * - element types with element content, if white space occurs directly
10544 * within any instance of those types.
10545 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010546 * Returns:
10547 * 1 if standalone="yes"
10548 * 0 if standalone="no"
10549 * -2 if standalone attribute is missing or invalid
10550 * (A standalone value of -2 means that the XML declaration was found,
10551 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010552 */
10553
10554int
10555xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010556 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010557
10558 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010559 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010560 SKIP(10);
10561 SKIP_BLANKS;
10562 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010563 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010564 return(standalone);
10565 }
10566 NEXT;
10567 SKIP_BLANKS;
10568 if (RAW == '\''){
10569 NEXT;
10570 if ((RAW == 'n') && (NXT(1) == 'o')) {
10571 standalone = 0;
10572 SKIP(2);
10573 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10574 (NXT(2) == 's')) {
10575 standalone = 1;
10576 SKIP(3);
10577 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010578 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010579 }
10580 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010581 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010582 } else
10583 NEXT;
10584 } else if (RAW == '"'){
10585 NEXT;
10586 if ((RAW == 'n') && (NXT(1) == 'o')) {
10587 standalone = 0;
10588 SKIP(2);
10589 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10590 (NXT(2) == 's')) {
10591 standalone = 1;
10592 SKIP(3);
10593 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010594 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010595 }
10596 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010597 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010598 } else
10599 NEXT;
10600 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010601 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010602 }
10603 }
10604 return(standalone);
10605}
10606
10607/**
10608 * xmlParseXMLDecl:
10609 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010610 *
Owen Taylor3473f882001-02-23 17:55:21 +000010611 * parse an XML declaration header
10612 *
10613 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10614 */
10615
10616void
10617xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10618 xmlChar *version;
10619
10620 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010621 * This value for standalone indicates that the document has an
10622 * XML declaration but it does not have a standalone attribute.
10623 * It will be overwritten later if a standalone attribute is found.
10624 */
10625 ctxt->input->standalone = -2;
10626
10627 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010628 * We know that '<?xml' is here.
10629 */
10630 SKIP(5);
10631
William M. Brack76e95df2003-10-18 16:20:14 +000010632 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010633 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10634 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010635 }
10636 SKIP_BLANKS;
10637
10638 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010639 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010640 */
10641 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010642 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010643 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010644 } else {
10645 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10646 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010647 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010648 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010649 if (ctxt->options & XML_PARSE_OLD10) {
10650 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10651 "Unsupported version '%s'\n",
10652 version);
10653 } else {
10654 if ((version[0] == '1') && ((version[1] == '.'))) {
10655 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10656 "Unsupported version '%s'\n",
10657 version, NULL);
10658 } else {
10659 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10660 "Unsupported version '%s'\n",
10661 version);
10662 }
10663 }
Daniel Veillard19840942001-11-29 16:11:38 +000010664 }
10665 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010666 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010667 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010668 }
Owen Taylor3473f882001-02-23 17:55:21 +000010669
10670 /*
10671 * We may have the encoding declaration
10672 */
William M. Brack76e95df2003-10-18 16:20:14 +000010673 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010674 if ((RAW == '?') && (NXT(1) == '>')) {
10675 SKIP(2);
10676 return;
10677 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010678 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010679 }
10680 xmlParseEncodingDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010681 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10682 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010683 /*
10684 * The XML REC instructs us to stop parsing right here
10685 */
10686 return;
10687 }
10688
10689 /*
10690 * We may have the standalone status.
10691 */
William M. Brack76e95df2003-10-18 16:20:14 +000010692 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010693 if ((RAW == '?') && (NXT(1) == '>')) {
10694 SKIP(2);
10695 return;
10696 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010697 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010698 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010699
10700 /*
10701 * We can grow the input buffer freely at that point
10702 */
10703 GROW;
10704
Owen Taylor3473f882001-02-23 17:55:21 +000010705 SKIP_BLANKS;
10706 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10707
10708 SKIP_BLANKS;
10709 if ((RAW == '?') && (NXT(1) == '>')) {
10710 SKIP(2);
10711 } else if (RAW == '>') {
10712 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010713 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010714 NEXT;
10715 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010716 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010717 MOVETO_ENDTAG(CUR_PTR);
10718 NEXT;
10719 }
10720}
10721
10722/**
10723 * xmlParseMisc:
10724 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010725 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010726 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010727 *
10728 * [27] Misc ::= Comment | PI | S
10729 */
10730
10731void
10732xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010733 while ((ctxt->instate != XML_PARSER_EOF) &&
10734 (((RAW == '<') && (NXT(1) == '?')) ||
10735 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10736 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010737 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010738 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010739 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010740 NEXT;
10741 } else
10742 xmlParseComment(ctxt);
10743 }
10744}
10745
10746/**
10747 * xmlParseDocument:
10748 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010749 *
Owen Taylor3473f882001-02-23 17:55:21 +000010750 * parse an XML document (and build a tree if using the standard SAX
10751 * interface).
10752 *
10753 * [1] document ::= prolog element Misc*
10754 *
10755 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10756 *
10757 * Returns 0, -1 in case of error. the parser context is augmented
10758 * as a result of the parsing.
10759 */
10760
10761int
10762xmlParseDocument(xmlParserCtxtPtr ctxt) {
10763 xmlChar start[4];
10764 xmlCharEncoding enc;
10765
10766 xmlInitParser();
10767
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010768 if ((ctxt == NULL) || (ctxt->input == NULL))
10769 return(-1);
10770
Owen Taylor3473f882001-02-23 17:55:21 +000010771 GROW;
10772
10773 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010774 * SAX: detecting the level.
10775 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010776 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010777
10778 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010779 * SAX: beginning of the document processing.
10780 */
10781 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10782 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010783 if (ctxt->instate == XML_PARSER_EOF)
10784 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010785
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010786 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010787 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010788 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010789 * Get the 4 first bytes and decode the charset
10790 * if enc != XML_CHAR_ENCODING_NONE
10791 * plug some encoding conversion routines.
10792 */
10793 start[0] = RAW;
10794 start[1] = NXT(1);
10795 start[2] = NXT(2);
10796 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010797 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010798 if (enc != XML_CHAR_ENCODING_NONE) {
10799 xmlSwitchEncoding(ctxt, enc);
10800 }
Owen Taylor3473f882001-02-23 17:55:21 +000010801 }
10802
10803
10804 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010805 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010806 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010807 }
10808
10809 /*
10810 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010811 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010812 * than just the first line, unless the amount of data is really
10813 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010814 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010815 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10816 GROW;
10817 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010818 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010819
10820 /*
10821 * Note that we will switch encoding on the fly.
10822 */
10823 xmlParseXMLDecl(ctxt);
Daniel Veillardafd27c22015-11-09 18:07:18 +080010824 if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10825 (ctxt->instate == XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010826 /*
10827 * The XML REC instructs us to stop parsing right here
10828 */
10829 return(-1);
10830 }
10831 ctxt->standalone = ctxt->input->standalone;
10832 SKIP_BLANKS;
10833 } else {
10834 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10835 }
10836 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10837 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010838 if (ctxt->instate == XML_PARSER_EOF)
10839 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010840 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10841 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10842 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10843 }
Owen Taylor3473f882001-02-23 17:55:21 +000010844
10845 /*
10846 * The Misc part of the Prolog
10847 */
10848 GROW;
10849 xmlParseMisc(ctxt);
10850
10851 /*
10852 * Then possibly doc type declaration(s) and more Misc
10853 * (doctypedecl Misc*)?
10854 */
10855 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010856 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010857
10858 ctxt->inSubset = 1;
10859 xmlParseDocTypeDecl(ctxt);
10860 if (RAW == '[') {
10861 ctxt->instate = XML_PARSER_DTD;
10862 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010863 if (ctxt->instate == XML_PARSER_EOF)
10864 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010865 }
10866
10867 /*
10868 * Create and update the external subset.
10869 */
10870 ctxt->inSubset = 2;
10871 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10872 (!ctxt->disableSAX))
10873 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10874 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010875 if (ctxt->instate == XML_PARSER_EOF)
10876 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010877 ctxt->inSubset = 0;
10878
Daniel Veillardac4118d2008-01-11 05:27:32 +000010879 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010880
10881 ctxt->instate = XML_PARSER_PROLOG;
10882 xmlParseMisc(ctxt);
10883 }
10884
10885 /*
10886 * Time to start parsing the tree itself
10887 */
10888 GROW;
10889 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010890 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10891 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010892 } else {
10893 ctxt->instate = XML_PARSER_CONTENT;
10894 xmlParseElement(ctxt);
10895 ctxt->instate = XML_PARSER_EPILOG;
10896
10897
10898 /*
10899 * The Misc part at the end
10900 */
10901 xmlParseMisc(ctxt);
10902
Daniel Veillard561b7f82002-03-20 21:55:57 +000010903 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010904 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010905 }
10906 ctxt->instate = XML_PARSER_EOF;
10907 }
10908
10909 /*
10910 * SAX: end of the document processing.
10911 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010912 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010913 ctxt->sax->endDocument(ctxt->userData);
10914
Daniel Veillard5997aca2002-03-18 18:36:20 +000010915 /*
10916 * Remove locally kept entity definitions if the tree was not built
10917 */
10918 if ((ctxt->myDoc != NULL) &&
10919 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10920 xmlFreeDoc(ctxt->myDoc);
10921 ctxt->myDoc = NULL;
10922 }
10923
Daniel Veillardae0765b2008-07-31 19:54:59 +000010924 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10925 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10926 if (ctxt->valid)
10927 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10928 if (ctxt->nsWellFormed)
10929 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10930 if (ctxt->options & XML_PARSE_OLD10)
10931 ctxt->myDoc->properties |= XML_DOC_OLD10;
10932 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010933 if (! ctxt->wellFormed) {
10934 ctxt->valid = 0;
10935 return(-1);
10936 }
Owen Taylor3473f882001-02-23 17:55:21 +000010937 return(0);
10938}
10939
10940/**
10941 * xmlParseExtParsedEnt:
10942 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010943 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010944 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010945 * An external general parsed entity is well-formed if it matches the
10946 * production labeled extParsedEnt.
10947 *
10948 * [78] extParsedEnt ::= TextDecl? content
10949 *
10950 * Returns 0, -1 in case of error. the parser context is augmented
10951 * as a result of the parsing.
10952 */
10953
10954int
10955xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10956 xmlChar start[4];
10957 xmlCharEncoding enc;
10958
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010959 if ((ctxt == NULL) || (ctxt->input == NULL))
10960 return(-1);
10961
Owen Taylor3473f882001-02-23 17:55:21 +000010962 xmlDefaultSAXHandlerInit();
10963
Daniel Veillard309f81d2003-09-23 09:02:53 +000010964 xmlDetectSAX2(ctxt);
10965
Owen Taylor3473f882001-02-23 17:55:21 +000010966 GROW;
10967
10968 /*
10969 * SAX: beginning of the document processing.
10970 */
10971 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10972 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10973
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010974 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010975 * Get the 4 first bytes and decode the charset
10976 * if enc != XML_CHAR_ENCODING_NONE
10977 * plug some encoding conversion routines.
10978 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010979 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10980 start[0] = RAW;
10981 start[1] = NXT(1);
10982 start[2] = NXT(2);
10983 start[3] = NXT(3);
10984 enc = xmlDetectCharEncoding(start, 4);
10985 if (enc != XML_CHAR_ENCODING_NONE) {
10986 xmlSwitchEncoding(ctxt, enc);
10987 }
Owen Taylor3473f882001-02-23 17:55:21 +000010988 }
10989
10990
10991 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010992 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010993 }
10994
10995 /*
10996 * Check for the XMLDecl in the Prolog.
10997 */
10998 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010999 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011000
11001 /*
11002 * Note that we will switch encoding on the fly.
11003 */
11004 xmlParseXMLDecl(ctxt);
11005 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11006 /*
11007 * The XML REC instructs us to stop parsing right here
11008 */
11009 return(-1);
11010 }
11011 SKIP_BLANKS;
11012 } else {
11013 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11014 }
11015 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11016 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011017 if (ctxt->instate == XML_PARSER_EOF)
11018 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000011019
11020 /*
11021 * Doing validity checking on chunk doesn't make sense
11022 */
11023 ctxt->instate = XML_PARSER_CONTENT;
11024 ctxt->validate = 0;
11025 ctxt->loadsubset = 0;
11026 ctxt->depth = 0;
11027
11028 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011029 if (ctxt->instate == XML_PARSER_EOF)
11030 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011031
Owen Taylor3473f882001-02-23 17:55:21 +000011032 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011033 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011034 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011035 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011036 }
11037
11038 /*
11039 * SAX: end of the document processing.
11040 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011041 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011042 ctxt->sax->endDocument(ctxt->userData);
11043
11044 if (! ctxt->wellFormed) return(-1);
11045 return(0);
11046}
11047
Daniel Veillard73b013f2003-09-30 12:36:01 +000011048#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011049/************************************************************************
11050 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011051 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000011052 * *
11053 ************************************************************************/
11054
11055/**
11056 * xmlParseLookupSequence:
11057 * @ctxt: an XML parser context
11058 * @first: the first char to lookup
11059 * @next: the next char to lookup or zero
11060 * @third: the next char to lookup or zero
11061 *
11062 * Try to find if a sequence (first, next, third) or just (first next) or
11063 * (first) is available in the input stream.
11064 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
11065 * to avoid rescanning sequences of bytes, it DOES change the state of the
11066 * parser, do not use liberally.
11067 *
11068 * Returns the index to the current parsing point if the full sequence
11069 * is available, -1 otherwise.
11070 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011071static int
Owen Taylor3473f882001-02-23 17:55:21 +000011072xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11073 xmlChar next, xmlChar third) {
11074 int base, len;
11075 xmlParserInputPtr in;
11076 const xmlChar *buf;
11077
11078 in = ctxt->input;
11079 if (in == NULL) return(-1);
11080 base = in->cur - in->base;
11081 if (base < 0) return(-1);
11082 if (ctxt->checkIndex > base)
11083 base = ctxt->checkIndex;
11084 if (in->buf == NULL) {
11085 buf = in->base;
11086 len = in->length;
11087 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011088 buf = xmlBufContent(in->buf->buffer);
11089 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011090 }
11091 /* take into account the sequence length */
11092 if (third) len -= 2;
11093 else if (next) len --;
11094 for (;base < len;base++) {
11095 if (buf[base] == first) {
11096 if (third != 0) {
11097 if ((buf[base + 1] != next) ||
11098 (buf[base + 2] != third)) continue;
11099 } else if (next != 0) {
11100 if (buf[base + 1] != next) continue;
11101 }
11102 ctxt->checkIndex = 0;
11103#ifdef DEBUG_PUSH
11104 if (next == 0)
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: lookup '%c' found at %d\n",
11107 first, base);
11108 else if (third == 0)
11109 xmlGenericError(xmlGenericErrorContext,
11110 "PP: lookup '%c%c' found at %d\n",
11111 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011112 else
Owen Taylor3473f882001-02-23 17:55:21 +000011113 xmlGenericError(xmlGenericErrorContext,
11114 "PP: lookup '%c%c%c' found at %d\n",
11115 first, next, third, base);
11116#endif
11117 return(base - (in->cur - in->base));
11118 }
11119 }
11120 ctxt->checkIndex = base;
11121#ifdef DEBUG_PUSH
11122 if (next == 0)
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: lookup '%c' failed\n", first);
11125 else if (third == 0)
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011128 else
Owen Taylor3473f882001-02-23 17:55:21 +000011129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: lookup '%c%c%c' failed\n", first, next, third);
11131#endif
11132 return(-1);
11133}
11134
11135/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011136 * xmlParseGetLasts:
11137 * @ctxt: an XML parser context
11138 * @lastlt: pointer to store the last '<' from the input
11139 * @lastgt: pointer to store the last '>' from the input
11140 *
11141 * Lookup the last < and > in the current chunk
11142 */
11143static void
11144xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11145 const xmlChar **lastgt) {
11146 const xmlChar *tmp;
11147
11148 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11149 xmlGenericError(xmlGenericErrorContext,
11150 "Internal error: xmlParseGetLasts\n");
11151 return;
11152 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011153 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011154 tmp = ctxt->input->end;
11155 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011156 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011157 if (tmp < ctxt->input->base) {
11158 *lastlt = NULL;
11159 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011160 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011161 *lastlt = tmp;
11162 tmp++;
11163 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11164 if (*tmp == '\'') {
11165 tmp++;
11166 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11167 if (tmp < ctxt->input->end) tmp++;
11168 } else if (*tmp == '"') {
11169 tmp++;
11170 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11171 if (tmp < ctxt->input->end) tmp++;
11172 } else
11173 tmp++;
11174 }
11175 if (tmp < ctxt->input->end)
11176 *lastgt = tmp;
11177 else {
11178 tmp = *lastlt;
11179 tmp--;
11180 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11181 if (tmp >= ctxt->input->base)
11182 *lastgt = tmp;
11183 else
11184 *lastgt = NULL;
11185 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011186 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011187 } else {
11188 *lastlt = NULL;
11189 *lastgt = NULL;
11190 }
11191}
11192/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011193 * xmlCheckCdataPush:
11194 * @cur: pointer to the bock of characters
11195 * @len: length of the block in bytes
11196 *
11197 * Check that the block of characters is okay as SCdata content [20]
11198 *
11199 * Returns the number of bytes to pass if okay, a negative index where an
11200 * UTF-8 error occured otherwise
11201 */
11202static int
11203xmlCheckCdataPush(const xmlChar *utf, int len) {
11204 int ix;
11205 unsigned char c;
11206 int codepoint;
11207
11208 if ((utf == NULL) || (len <= 0))
11209 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011210
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011211 for (ix = 0; ix < len;) { /* string is 0-terminated */
11212 c = utf[ix];
11213 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11214 if (c >= 0x20)
11215 ix++;
11216 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11217 ix++;
11218 else
11219 return(-ix);
11220 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
Daniel Veillard4a5d80a2015-09-18 15:06:46 +080011221 if (ix + 2 > len) return(-ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011222 if ((utf[ix+1] & 0xc0 ) != 0x80)
11223 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011224 codepoint = (utf[ix] & 0x1f) << 6;
11225 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011226 if (!xmlIsCharQ(codepoint))
11227 return(-ix);
11228 ix += 2;
11229 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
Daniel Veillard4a5d80a2015-09-18 15:06:46 +080011230 if (ix + 3 > len) return(-ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011231 if (((utf[ix+1] & 0xc0) != 0x80) ||
11232 ((utf[ix+2] & 0xc0) != 0x80))
11233 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011234 codepoint = (utf[ix] & 0xf) << 12;
11235 codepoint |= (utf[ix+1] & 0x3f) << 6;
11236 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011237 if (!xmlIsCharQ(codepoint))
11238 return(-ix);
11239 ix += 3;
11240 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
Daniel Veillard4a5d80a2015-09-18 15:06:46 +080011241 if (ix + 4 > len) return(-ix);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011242 if (((utf[ix+1] & 0xc0) != 0x80) ||
11243 ((utf[ix+2] & 0xc0) != 0x80) ||
11244 ((utf[ix+3] & 0xc0) != 0x80))
11245 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011246 codepoint = (utf[ix] & 0x7) << 18;
11247 codepoint |= (utf[ix+1] & 0x3f) << 12;
11248 codepoint |= (utf[ix+2] & 0x3f) << 6;
11249 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011250 if (!xmlIsCharQ(codepoint))
11251 return(-ix);
11252 ix += 4;
11253 } else /* unknown encoding */
11254 return(-ix);
11255 }
11256 return(ix);
11257}
11258
11259/**
Owen Taylor3473f882001-02-23 17:55:21 +000011260 * xmlParseTryOrFinish:
11261 * @ctxt: an XML parser context
11262 * @terminate: last chunk indicator
11263 *
11264 * Try to progress on parsing
11265 *
11266 * Returns zero if no parsing was possible
11267 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011268static int
Owen Taylor3473f882001-02-23 17:55:21 +000011269xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11270 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011271 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011272 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011273 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011274
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011275 if (ctxt->input == NULL)
11276 return(0);
11277
Owen Taylor3473f882001-02-23 17:55:21 +000011278#ifdef DEBUG_PUSH
11279 switch (ctxt->instate) {
11280 case XML_PARSER_EOF:
11281 xmlGenericError(xmlGenericErrorContext,
11282 "PP: try EOF\n"); break;
11283 case XML_PARSER_START:
11284 xmlGenericError(xmlGenericErrorContext,
11285 "PP: try START\n"); break;
11286 case XML_PARSER_MISC:
11287 xmlGenericError(xmlGenericErrorContext,
11288 "PP: try MISC\n");break;
11289 case XML_PARSER_COMMENT:
11290 xmlGenericError(xmlGenericErrorContext,
11291 "PP: try COMMENT\n");break;
11292 case XML_PARSER_PROLOG:
11293 xmlGenericError(xmlGenericErrorContext,
11294 "PP: try PROLOG\n");break;
11295 case XML_PARSER_START_TAG:
11296 xmlGenericError(xmlGenericErrorContext,
11297 "PP: try START_TAG\n");break;
11298 case XML_PARSER_CONTENT:
11299 xmlGenericError(xmlGenericErrorContext,
11300 "PP: try CONTENT\n");break;
11301 case XML_PARSER_CDATA_SECTION:
11302 xmlGenericError(xmlGenericErrorContext,
11303 "PP: try CDATA_SECTION\n");break;
11304 case XML_PARSER_END_TAG:
11305 xmlGenericError(xmlGenericErrorContext,
11306 "PP: try END_TAG\n");break;
11307 case XML_PARSER_ENTITY_DECL:
11308 xmlGenericError(xmlGenericErrorContext,
11309 "PP: try ENTITY_DECL\n");break;
11310 case XML_PARSER_ENTITY_VALUE:
11311 xmlGenericError(xmlGenericErrorContext,
11312 "PP: try ENTITY_VALUE\n");break;
11313 case XML_PARSER_ATTRIBUTE_VALUE:
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: try ATTRIBUTE_VALUE\n");break;
11316 case XML_PARSER_DTD:
11317 xmlGenericError(xmlGenericErrorContext,
11318 "PP: try DTD\n");break;
11319 case XML_PARSER_EPILOG:
11320 xmlGenericError(xmlGenericErrorContext,
11321 "PP: try EPILOG\n");break;
11322 case XML_PARSER_PI:
11323 xmlGenericError(xmlGenericErrorContext,
11324 "PP: try PI\n");break;
11325 case XML_PARSER_IGNORE:
11326 xmlGenericError(xmlGenericErrorContext,
11327 "PP: try IGNORE\n");break;
11328 }
11329#endif
11330
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011331 if ((ctxt->input != NULL) &&
11332 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011333 xmlSHRINK(ctxt);
11334 ctxt->checkIndex = 0;
11335 }
11336 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011337
Daniel Veillarde50ba812013-04-11 15:54:51 +080011338 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011339 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011340 return(0);
11341
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011342
Owen Taylor3473f882001-02-23 17:55:21 +000011343 /*
11344 * Pop-up of finished entities.
11345 */
11346 while ((RAW == 0) && (ctxt->inputNr > 1))
11347 xmlPopInput(ctxt);
11348
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011349 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011350 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011351 avail = ctxt->input->length -
11352 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011353 else {
11354 /*
11355 * If we are operating on converted input, try to flush
11356 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011357 * buffer. But do not do this in document start where
11358 * encoding="..." may not have been read and we work on a
11359 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011360 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011361 if ((ctxt->instate != XML_PARSER_START) &&
11362 (ctxt->input->buf->raw != NULL) &&
11363 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011364 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11365 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011366 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011367
11368 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011369 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11370 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011371 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011372 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011373 (ctxt->input->cur - ctxt->input->base);
11374 }
Owen Taylor3473f882001-02-23 17:55:21 +000011375 if (avail < 1)
11376 goto done;
11377 switch (ctxt->instate) {
11378 case XML_PARSER_EOF:
11379 /*
11380 * Document parsing is done !
11381 */
11382 goto done;
11383 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011384 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11385 xmlChar start[4];
11386 xmlCharEncoding enc;
11387
11388 /*
11389 * Very first chars read from the document flow.
11390 */
11391 if (avail < 4)
11392 goto done;
11393
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011394 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011395 * Get the 4 first bytes and decode the charset
11396 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011397 * plug some encoding conversion routines,
11398 * else xmlSwitchEncoding will set to (default)
11399 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011400 */
11401 start[0] = RAW;
11402 start[1] = NXT(1);
11403 start[2] = NXT(2);
11404 start[3] = NXT(3);
11405 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011406 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011407 break;
11408 }
Owen Taylor3473f882001-02-23 17:55:21 +000011409
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011410 if (avail < 2)
11411 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011412 cur = ctxt->input->cur[0];
11413 next = ctxt->input->cur[1];
11414 if (cur == 0) {
11415 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11416 ctxt->sax->setDocumentLocator(ctxt->userData,
11417 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011418 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011419 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011420#ifdef DEBUG_PUSH
11421 xmlGenericError(xmlGenericErrorContext,
11422 "PP: entering EOF\n");
11423#endif
11424 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11425 ctxt->sax->endDocument(ctxt->userData);
11426 goto done;
11427 }
11428 if ((cur == '<') && (next == '?')) {
11429 /* PI or XML decl */
11430 if (avail < 5) return(ret);
11431 if ((!terminate) &&
11432 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11433 return(ret);
11434 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11435 ctxt->sax->setDocumentLocator(ctxt->userData,
11436 &xmlDefaultSAXLocator);
11437 if ((ctxt->input->cur[2] == 'x') &&
11438 (ctxt->input->cur[3] == 'm') &&
11439 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011440 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011441 ret += 5;
11442#ifdef DEBUG_PUSH
11443 xmlGenericError(xmlGenericErrorContext,
11444 "PP: Parsing XML Decl\n");
11445#endif
11446 xmlParseXMLDecl(ctxt);
11447 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11448 /*
11449 * The XML REC instructs us to stop parsing right
11450 * here
11451 */
Daniel Veillarde3b15972015-11-20 14:59:30 +080011452 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011453 return(0);
11454 }
11455 ctxt->standalone = ctxt->input->standalone;
11456 if ((ctxt->encoding == NULL) &&
11457 (ctxt->input->encoding != NULL))
11458 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11459 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11460 (!ctxt->disableSAX))
11461 ctxt->sax->startDocument(ctxt->userData);
11462 ctxt->instate = XML_PARSER_MISC;
11463#ifdef DEBUG_PUSH
11464 xmlGenericError(xmlGenericErrorContext,
11465 "PP: entering MISC\n");
11466#endif
11467 } else {
11468 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11469 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11470 (!ctxt->disableSAX))
11471 ctxt->sax->startDocument(ctxt->userData);
11472 ctxt->instate = XML_PARSER_MISC;
11473#ifdef DEBUG_PUSH
11474 xmlGenericError(xmlGenericErrorContext,
11475 "PP: entering MISC\n");
11476#endif
11477 }
11478 } else {
11479 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11480 ctxt->sax->setDocumentLocator(ctxt->userData,
11481 &xmlDefaultSAXLocator);
11482 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011483 if (ctxt->version == NULL) {
11484 xmlErrMemory(ctxt, NULL);
11485 break;
11486 }
Owen Taylor3473f882001-02-23 17:55:21 +000011487 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11488 (!ctxt->disableSAX))
11489 ctxt->sax->startDocument(ctxt->userData);
11490 ctxt->instate = XML_PARSER_MISC;
11491#ifdef DEBUG_PUSH
11492 xmlGenericError(xmlGenericErrorContext,
11493 "PP: entering MISC\n");
11494#endif
11495 }
11496 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011497 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011498 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011499 const xmlChar *prefix = NULL;
11500 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011501 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011502
11503 if ((avail < 2) && (ctxt->inputNr == 1))
11504 goto done;
11505 cur = ctxt->input->cur[0];
11506 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011507 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011508 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011509 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11510 ctxt->sax->endDocument(ctxt->userData);
11511 goto done;
11512 }
11513 if (!terminate) {
11514 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011515 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011516 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011517 goto done;
11518 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11519 goto done;
11520 }
11521 }
11522 if (ctxt->spaceNr == 0)
11523 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011524 else if (*ctxt->space == -2)
11525 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011526 else
11527 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011528#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011529 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011530#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011531 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011532#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011533 else
11534 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011535#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011536 if (ctxt->instate == XML_PARSER_EOF)
11537 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011538 if (name == NULL) {
11539 spacePop(ctxt);
Daniel Veillarde3b15972015-11-20 14:59:30 +080011540 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011541 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11542 ctxt->sax->endDocument(ctxt->userData);
11543 goto done;
11544 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011545#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011546 /*
11547 * [ VC: Root Element Type ]
11548 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011549 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011550 */
11551 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11552 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11553 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011554#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011555
11556 /*
11557 * Check for an Empty Element.
11558 */
11559 if ((RAW == '/') && (NXT(1) == '>')) {
11560 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011561
11562 if (ctxt->sax2) {
11563 if ((ctxt->sax != NULL) &&
11564 (ctxt->sax->endElementNs != NULL) &&
11565 (!ctxt->disableSAX))
11566 ctxt->sax->endElementNs(ctxt->userData, name,
11567 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011568 if (ctxt->nsNr - nsNr > 0)
11569 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011570#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011571 } else {
11572 if ((ctxt->sax != NULL) &&
11573 (ctxt->sax->endElement != NULL) &&
11574 (!ctxt->disableSAX))
11575 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011576#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011577 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011578 if (ctxt->instate == XML_PARSER_EOF)
11579 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011580 spacePop(ctxt);
11581 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011582 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011583 } else {
11584 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011585 }
Daniel Veillard65686452012-07-19 18:25:01 +080011586 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011587 break;
11588 }
11589 if (RAW == '>') {
11590 NEXT;
11591 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011592 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011593 "Couldn't find end of Start Tag %s\n",
11594 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011595 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011596 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011597 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011598 if (ctxt->sax2)
11599 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011600#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011601 else
11602 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011603#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011604
Daniel Veillarda880b122003-04-21 21:36:41 +000011605 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011606 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011607 break;
11608 }
11609 case XML_PARSER_CONTENT: {
11610 const xmlChar *test;
11611 unsigned int cons;
11612 if ((avail < 2) && (ctxt->inputNr == 1))
11613 goto done;
11614 cur = ctxt->input->cur[0];
11615 next = ctxt->input->cur[1];
11616
11617 test = CUR_PTR;
11618 cons = ctxt->input->consumed;
11619 if ((cur == '<') && (next == '/')) {
11620 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011621 break;
11622 } else if ((cur == '<') && (next == '?')) {
11623 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011624 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11625 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011626 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011627 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011628 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011629 ctxt->instate = XML_PARSER_CONTENT;
11630 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011631 } else if ((cur == '<') && (next != '!')) {
11632 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011633 break;
11634 } else if ((cur == '<') && (next == '!') &&
11635 (ctxt->input->cur[2] == '-') &&
11636 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011637 int term;
11638
11639 if (avail < 4)
11640 goto done;
11641 ctxt->input->cur += 4;
11642 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11643 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011644 if ((!terminate) && (term < 0)) {
11645 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011646 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011647 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011648 xmlParseComment(ctxt);
11649 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011650 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011651 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11652 (ctxt->input->cur[2] == '[') &&
11653 (ctxt->input->cur[3] == 'C') &&
11654 (ctxt->input->cur[4] == 'D') &&
11655 (ctxt->input->cur[5] == 'A') &&
11656 (ctxt->input->cur[6] == 'T') &&
11657 (ctxt->input->cur[7] == 'A') &&
11658 (ctxt->input->cur[8] == '[')) {
11659 SKIP(9);
11660 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011661 break;
11662 } else if ((cur == '<') && (next == '!') &&
11663 (avail < 9)) {
11664 goto done;
11665 } else if (cur == '&') {
11666 if ((!terminate) &&
11667 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11668 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011669 xmlParseReference(ctxt);
11670 } else {
11671 /* TODO Avoid the extra copy, handle directly !!! */
11672 /*
11673 * Goal of the following test is:
11674 * - minimize calls to the SAX 'character' callback
11675 * when they are mergeable
11676 * - handle an problem for isBlank when we only parse
11677 * a sequence of blank chars and the next one is
11678 * not available to check against '<' presence.
11679 * - tries to homogenize the differences in SAX
11680 * callbacks between the push and pull versions
11681 * of the parser.
11682 */
11683 if ((ctxt->inputNr == 1) &&
11684 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11685 if (!terminate) {
11686 if (ctxt->progressive) {
11687 if ((lastlt == NULL) ||
11688 (ctxt->input->cur > lastlt))
11689 goto done;
11690 } else if (xmlParseLookupSequence(ctxt,
11691 '<', 0, 0) < 0) {
11692 goto done;
11693 }
11694 }
11695 }
11696 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011697 xmlParseCharData(ctxt, 0);
11698 }
11699 /*
11700 * Pop-up of finished entities.
11701 */
11702 while ((RAW == 0) && (ctxt->inputNr > 1))
11703 xmlPopInput(ctxt);
11704 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011705 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11706 "detected an error in element content\n");
Daniel Veillarde3b15972015-11-20 14:59:30 +080011707 xmlHaltParser(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011708 break;
11709 }
11710 break;
11711 }
11712 case XML_PARSER_END_TAG:
11713 if (avail < 2)
11714 goto done;
11715 if (!terminate) {
11716 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011717 /* > can be found unescaped in attribute values */
11718 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011719 goto done;
11720 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11721 goto done;
11722 }
11723 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011724 if (ctxt->sax2) {
11725 xmlParseEndTag2(ctxt,
11726 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11727 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011728 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011729 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011730 }
11731#ifdef LIBXML_SAX1_ENABLED
11732 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011733 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011734#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011735 if (ctxt->instate == XML_PARSER_EOF) {
11736 /* Nothing */
11737 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011738 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011739 } else {
11740 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011741 }
11742 break;
11743 case XML_PARSER_CDATA_SECTION: {
11744 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011745 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011746 * cdataBlock merge back contiguous callbacks.
11747 */
11748 int base;
11749
11750 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11751 if (base < 0) {
11752 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011753 int tmp;
11754
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011755 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011756 XML_PARSER_BIG_BUFFER_SIZE);
11757 if (tmp < 0) {
11758 tmp = -tmp;
11759 ctxt->input->cur += tmp;
11760 goto encoding_error;
11761 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011762 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11763 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011764 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011765 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011766 else if (ctxt->sax->characters != NULL)
11767 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011768 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011769 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011770 if (ctxt->instate == XML_PARSER_EOF)
11771 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011772 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011773 ctxt->checkIndex = 0;
11774 }
11775 goto done;
11776 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011777 int tmp;
11778
11779 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11780 if ((tmp < 0) || (tmp != base)) {
11781 tmp = -tmp;
11782 ctxt->input->cur += tmp;
11783 goto encoding_error;
11784 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011785 if ((ctxt->sax != NULL) && (base == 0) &&
11786 (ctxt->sax->cdataBlock != NULL) &&
11787 (!ctxt->disableSAX)) {
11788 /*
11789 * Special case to provide identical behaviour
11790 * between pull and push parsers on enpty CDATA
11791 * sections
11792 */
11793 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11794 (!strncmp((const char *)&ctxt->input->cur[-9],
11795 "<![CDATA[", 9)))
11796 ctxt->sax->cdataBlock(ctxt->userData,
11797 BAD_CAST "", 0);
11798 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011799 (!ctxt->disableSAX)) {
11800 if (ctxt->sax->cdataBlock != NULL)
11801 ctxt->sax->cdataBlock(ctxt->userData,
11802 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011803 else if (ctxt->sax->characters != NULL)
11804 ctxt->sax->characters(ctxt->userData,
11805 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011806 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011807 if (ctxt->instate == XML_PARSER_EOF)
11808 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011809 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011810 ctxt->checkIndex = 0;
11811 ctxt->instate = XML_PARSER_CONTENT;
11812#ifdef DEBUG_PUSH
11813 xmlGenericError(xmlGenericErrorContext,
11814 "PP: entering CONTENT\n");
11815#endif
11816 }
11817 break;
11818 }
Owen Taylor3473f882001-02-23 17:55:21 +000011819 case XML_PARSER_MISC:
11820 SKIP_BLANKS;
11821 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011822 avail = ctxt->input->length -
11823 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011824 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011825 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011826 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011827 if (avail < 2)
11828 goto done;
11829 cur = ctxt->input->cur[0];
11830 next = ctxt->input->cur[1];
11831 if ((cur == '<') && (next == '?')) {
11832 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011833 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11834 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011835 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011836 }
Owen Taylor3473f882001-02-23 17:55:21 +000011837#ifdef DEBUG_PUSH
11838 xmlGenericError(xmlGenericErrorContext,
11839 "PP: Parsing PI\n");
11840#endif
11841 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011842 if (ctxt->instate == XML_PARSER_EOF)
11843 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011844 ctxt->instate = XML_PARSER_MISC;
11845 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011846 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011847 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011848 (ctxt->input->cur[2] == '-') &&
11849 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011850 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011851 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11852 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011853 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011854 }
Owen Taylor3473f882001-02-23 17:55:21 +000011855#ifdef DEBUG_PUSH
11856 xmlGenericError(xmlGenericErrorContext,
11857 "PP: Parsing Comment\n");
11858#endif
11859 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011860 if (ctxt->instate == XML_PARSER_EOF)
11861 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011862 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011863 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011864 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011865 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011866 (ctxt->input->cur[2] == 'D') &&
11867 (ctxt->input->cur[3] == 'O') &&
11868 (ctxt->input->cur[4] == 'C') &&
11869 (ctxt->input->cur[5] == 'T') &&
11870 (ctxt->input->cur[6] == 'Y') &&
11871 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011872 (ctxt->input->cur[8] == 'E')) {
11873 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011874 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11875 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011876 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011877 }
Owen Taylor3473f882001-02-23 17:55:21 +000011878#ifdef DEBUG_PUSH
11879 xmlGenericError(xmlGenericErrorContext,
11880 "PP: Parsing internal subset\n");
11881#endif
11882 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011883 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011884 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011885 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011886 if (ctxt->instate == XML_PARSER_EOF)
11887 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011888 if (RAW == '[') {
11889 ctxt->instate = XML_PARSER_DTD;
11890#ifdef DEBUG_PUSH
11891 xmlGenericError(xmlGenericErrorContext,
11892 "PP: entering DTD\n");
11893#endif
11894 } else {
11895 /*
11896 * Create and update the external subset.
11897 */
11898 ctxt->inSubset = 2;
11899 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11900 (ctxt->sax->externalSubset != NULL))
11901 ctxt->sax->externalSubset(ctxt->userData,
11902 ctxt->intSubName, ctxt->extSubSystem,
11903 ctxt->extSubURI);
11904 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011905 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011906 ctxt->instate = XML_PARSER_PROLOG;
11907#ifdef DEBUG_PUSH
11908 xmlGenericError(xmlGenericErrorContext,
11909 "PP: entering PROLOG\n");
11910#endif
11911 }
11912 } else if ((cur == '<') && (next == '!') &&
11913 (avail < 9)) {
11914 goto done;
11915 } else {
11916 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011917 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011918 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011919#ifdef DEBUG_PUSH
11920 xmlGenericError(xmlGenericErrorContext,
11921 "PP: entering START_TAG\n");
11922#endif
11923 }
11924 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011925 case XML_PARSER_PROLOG:
11926 SKIP_BLANKS;
11927 if (ctxt->input->buf == NULL)
11928 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11929 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011930 avail = xmlBufUse(ctxt->input->buf->buffer) -
11931 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011932 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011933 goto done;
11934 cur = ctxt->input->cur[0];
11935 next = ctxt->input->cur[1];
11936 if ((cur == '<') && (next == '?')) {
11937 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011938 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11939 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011940 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011941 }
Owen Taylor3473f882001-02-23 17:55:21 +000011942#ifdef DEBUG_PUSH
11943 xmlGenericError(xmlGenericErrorContext,
11944 "PP: Parsing PI\n");
11945#endif
11946 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011947 if (ctxt->instate == XML_PARSER_EOF)
11948 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011949 ctxt->instate = XML_PARSER_PROLOG;
11950 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011951 } else if ((cur == '<') && (next == '!') &&
11952 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11953 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011954 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11955 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011956 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011957 }
Owen Taylor3473f882001-02-23 17:55:21 +000011958#ifdef DEBUG_PUSH
11959 xmlGenericError(xmlGenericErrorContext,
11960 "PP: Parsing Comment\n");
11961#endif
11962 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011963 if (ctxt->instate == XML_PARSER_EOF)
11964 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011965 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011966 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011967 } else if ((cur == '<') && (next == '!') &&
11968 (avail < 4)) {
11969 goto done;
11970 } else {
11971 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011972 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011973 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011974 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011975#ifdef DEBUG_PUSH
11976 xmlGenericError(xmlGenericErrorContext,
11977 "PP: entering START_TAG\n");
11978#endif
11979 }
11980 break;
11981 case XML_PARSER_EPILOG:
11982 SKIP_BLANKS;
11983 if (ctxt->input->buf == NULL)
11984 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11985 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011986 avail = xmlBufUse(ctxt->input->buf->buffer) -
11987 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011988 if (avail < 2)
11989 goto done;
11990 cur = ctxt->input->cur[0];
11991 next = ctxt->input->cur[1];
11992 if ((cur == '<') && (next == '?')) {
11993 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011994 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11995 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011996 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011997 }
Owen Taylor3473f882001-02-23 17:55:21 +000011998#ifdef DEBUG_PUSH
11999 xmlGenericError(xmlGenericErrorContext,
12000 "PP: Parsing PI\n");
12001#endif
12002 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012003 if (ctxt->instate == XML_PARSER_EOF)
12004 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012005 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080012006 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012007 } else if ((cur == '<') && (next == '!') &&
12008 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
12009 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080012010 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
12011 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012012 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080012013 }
Owen Taylor3473f882001-02-23 17:55:21 +000012014#ifdef DEBUG_PUSH
12015 xmlGenericError(xmlGenericErrorContext,
12016 "PP: Parsing Comment\n");
12017#endif
12018 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012019 if (ctxt->instate == XML_PARSER_EOF)
12020 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012021 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080012022 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000012023 } else if ((cur == '<') && (next == '!') &&
12024 (avail < 4)) {
12025 goto done;
12026 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012027 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillarde3b15972015-11-20 14:59:30 +080012028 xmlHaltParser(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012029#ifdef DEBUG_PUSH
12030 xmlGenericError(xmlGenericErrorContext,
12031 "PP: entering EOF\n");
12032#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012033 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012034 ctxt->sax->endDocument(ctxt->userData);
12035 goto done;
12036 }
12037 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012038 case XML_PARSER_DTD: {
12039 /*
12040 * Sorry but progressive parsing of the internal subset
12041 * is not expected to be supported. We first check that
12042 * the full content of the internal subset is available and
12043 * the parsing is launched only at that point.
12044 * Internal subset ends up with "']' S? '>'" in an unescaped
12045 * section and not in a ']]>' sequence which are conditional
12046 * sections (whoever argued to keep that crap in XML deserve
12047 * a place in hell !).
12048 */
12049 int base, i;
12050 xmlChar *buf;
12051 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012052 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000012053
12054 base = ctxt->input->cur - ctxt->input->base;
12055 if (base < 0) return(0);
12056 if (ctxt->checkIndex > base)
12057 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012058 buf = xmlBufContent(ctxt->input->buf->buffer);
12059 use = xmlBufUse(ctxt->input->buf->buffer);
12060 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000012061 if (quote != 0) {
12062 if (buf[base] == quote)
12063 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012064 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000012065 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012066 if ((quote == 0) && (buf[base] == '<')) {
12067 int found = 0;
12068 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012069 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000012070 (buf[base + 1] == '!') &&
12071 (buf[base + 2] == '-') &&
12072 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012073 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000012074 if ((buf[base] == '-') &&
12075 (buf[base + 1] == '-') &&
12076 (buf[base + 2] == '>')) {
12077 found = 1;
12078 base += 2;
12079 break;
12080 }
12081 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012082 if (!found) {
12083#if 0
12084 fprintf(stderr, "unfinished comment\n");
12085#endif
12086 break; /* for */
12087 }
Daniel Veillard036143b2004-02-12 11:57:52 +000012088 continue;
12089 }
12090 }
Owen Taylor3473f882001-02-23 17:55:21 +000012091 if (buf[base] == '"') {
12092 quote = '"';
12093 continue;
12094 }
12095 if (buf[base] == '\'') {
12096 quote = '\'';
12097 continue;
12098 }
12099 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012100#if 0
12101 fprintf(stderr, "%c%c%c%c: ", buf[base],
12102 buf[base + 1], buf[base + 2], buf[base + 3]);
12103#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012104 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012105 break;
12106 if (buf[base + 1] == ']') {
12107 /* conditional crap, skip both ']' ! */
12108 base++;
12109 continue;
12110 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012111 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012112 if (buf[base + i] == '>') {
12113#if 0
12114 fprintf(stderr, "found\n");
12115#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012116 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012117 }
12118 if (!IS_BLANK_CH(buf[base + i])) {
12119#if 0
12120 fprintf(stderr, "not found\n");
12121#endif
12122 goto not_end_of_int_subset;
12123 }
Owen Taylor3473f882001-02-23 17:55:21 +000012124 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012125#if 0
12126 fprintf(stderr, "end of stream\n");
12127#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012128 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012129
Owen Taylor3473f882001-02-23 17:55:21 +000012130 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012131not_end_of_int_subset:
12132 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012133 }
12134 /*
12135 * We didn't found the end of the Internal subset
12136 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012137 if (quote == 0)
12138 ctxt->checkIndex = base;
12139 else
12140 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012141#ifdef DEBUG_PUSH
12142 if (next == 0)
12143 xmlGenericError(xmlGenericErrorContext,
12144 "PP: lookup of int subset end filed\n");
12145#endif
12146 goto done;
12147
12148found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012149 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012150 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012151 if (ctxt->instate == XML_PARSER_EOF)
12152 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012153 ctxt->inSubset = 2;
12154 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12155 (ctxt->sax->externalSubset != NULL))
12156 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12157 ctxt->extSubSystem, ctxt->extSubURI);
12158 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012159 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012160 if (ctxt->instate == XML_PARSER_EOF)
12161 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012162 ctxt->instate = XML_PARSER_PROLOG;
12163 ctxt->checkIndex = 0;
12164#ifdef DEBUG_PUSH
12165 xmlGenericError(xmlGenericErrorContext,
12166 "PP: entering PROLOG\n");
12167#endif
12168 break;
12169 }
12170 case XML_PARSER_COMMENT:
12171 xmlGenericError(xmlGenericErrorContext,
12172 "PP: internal error, state == COMMENT\n");
12173 ctxt->instate = XML_PARSER_CONTENT;
12174#ifdef DEBUG_PUSH
12175 xmlGenericError(xmlGenericErrorContext,
12176 "PP: entering CONTENT\n");
12177#endif
12178 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012179 case XML_PARSER_IGNORE:
12180 xmlGenericError(xmlGenericErrorContext,
12181 "PP: internal error, state == IGNORE");
12182 ctxt->instate = XML_PARSER_DTD;
12183#ifdef DEBUG_PUSH
12184 xmlGenericError(xmlGenericErrorContext,
12185 "PP: entering DTD\n");
12186#endif
12187 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012188 case XML_PARSER_PI:
12189 xmlGenericError(xmlGenericErrorContext,
12190 "PP: internal error, state == PI\n");
12191 ctxt->instate = XML_PARSER_CONTENT;
12192#ifdef DEBUG_PUSH
12193 xmlGenericError(xmlGenericErrorContext,
12194 "PP: entering CONTENT\n");
12195#endif
12196 break;
12197 case XML_PARSER_ENTITY_DECL:
12198 xmlGenericError(xmlGenericErrorContext,
12199 "PP: internal error, state == ENTITY_DECL\n");
12200 ctxt->instate = XML_PARSER_DTD;
12201#ifdef DEBUG_PUSH
12202 xmlGenericError(xmlGenericErrorContext,
12203 "PP: entering DTD\n");
12204#endif
12205 break;
12206 case XML_PARSER_ENTITY_VALUE:
12207 xmlGenericError(xmlGenericErrorContext,
12208 "PP: internal error, state == ENTITY_VALUE\n");
12209 ctxt->instate = XML_PARSER_CONTENT;
12210#ifdef DEBUG_PUSH
12211 xmlGenericError(xmlGenericErrorContext,
12212 "PP: entering DTD\n");
12213#endif
12214 break;
12215 case XML_PARSER_ATTRIBUTE_VALUE:
12216 xmlGenericError(xmlGenericErrorContext,
12217 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12218 ctxt->instate = XML_PARSER_START_TAG;
12219#ifdef DEBUG_PUSH
12220 xmlGenericError(xmlGenericErrorContext,
12221 "PP: entering START_TAG\n");
12222#endif
12223 break;
12224 case XML_PARSER_SYSTEM_LITERAL:
12225 xmlGenericError(xmlGenericErrorContext,
12226 "PP: internal error, state == SYSTEM_LITERAL\n");
12227 ctxt->instate = XML_PARSER_START_TAG;
12228#ifdef DEBUG_PUSH
12229 xmlGenericError(xmlGenericErrorContext,
12230 "PP: entering START_TAG\n");
12231#endif
12232 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012233 case XML_PARSER_PUBLIC_LITERAL:
12234 xmlGenericError(xmlGenericErrorContext,
12235 "PP: internal error, state == PUBLIC_LITERAL\n");
12236 ctxt->instate = XML_PARSER_START_TAG;
12237#ifdef DEBUG_PUSH
12238 xmlGenericError(xmlGenericErrorContext,
12239 "PP: entering START_TAG\n");
12240#endif
12241 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012242 }
12243 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012244done:
Owen Taylor3473f882001-02-23 17:55:21 +000012245#ifdef DEBUG_PUSH
12246 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12247#endif
12248 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012249encoding_error:
12250 {
12251 char buffer[150];
12252
12253 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12254 ctxt->input->cur[0], ctxt->input->cur[1],
12255 ctxt->input->cur[2], ctxt->input->cur[3]);
12256 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12257 "Input is not proper UTF-8, indicate encoding !\n%s",
12258 BAD_CAST buffer, NULL);
12259 }
12260 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012261}
12262
12263/**
Daniel Veillard65686452012-07-19 18:25:01 +080012264 * xmlParseCheckTransition:
12265 * @ctxt: an XML parser context
12266 * @chunk: a char array
12267 * @size: the size in byte of the chunk
12268 *
12269 * Check depending on the current parser state if the chunk given must be
12270 * processed immediately or one need more data to advance on parsing.
12271 *
12272 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12273 */
12274static int
12275xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12276 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12277 return(-1);
12278 if (ctxt->instate == XML_PARSER_START_TAG) {
12279 if (memchr(chunk, '>', size) != NULL)
12280 return(1);
12281 return(0);
12282 }
12283 if (ctxt->progressive == XML_PARSER_COMMENT) {
12284 if (memchr(chunk, '>', size) != NULL)
12285 return(1);
12286 return(0);
12287 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012288 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12289 if (memchr(chunk, '>', size) != NULL)
12290 return(1);
12291 return(0);
12292 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012293 if (ctxt->progressive == XML_PARSER_PI) {
12294 if (memchr(chunk, '>', size) != NULL)
12295 return(1);
12296 return(0);
12297 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012298 if (ctxt->instate == XML_PARSER_END_TAG) {
12299 if (memchr(chunk, '>', size) != NULL)
12300 return(1);
12301 return(0);
12302 }
12303 if ((ctxt->progressive == XML_PARSER_DTD) ||
12304 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012305 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012306 return(1);
12307 return(0);
12308 }
Daniel Veillard65686452012-07-19 18:25:01 +080012309 return(1);
12310}
12311
12312/**
Owen Taylor3473f882001-02-23 17:55:21 +000012313 * xmlParseChunk:
12314 * @ctxt: an XML parser context
12315 * @chunk: an char array
12316 * @size: the size in byte of the chunk
12317 * @terminate: last chunk indicator
12318 *
12319 * Parse a Chunk of memory
12320 *
12321 * Returns zero if no error, the xmlParserErrors otherwise.
12322 */
12323int
12324xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12325 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012326 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012327 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012328 size_t old_avail = 0;
12329 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012330
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012331 if (ctxt == NULL)
12332 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012333 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012334 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012335 if (ctxt->instate == XML_PARSER_EOF)
12336 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012337 if (ctxt->instate == XML_PARSER_START)
12338 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012339 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12340 (chunk[size - 1] == '\r')) {
12341 end_in_lf = 1;
12342 size--;
12343 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012344
12345xmldecl_done:
12346
Owen Taylor3473f882001-02-23 17:55:21 +000012347 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12348 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012349 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12350 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012351 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012352
Daniel Veillard65686452012-07-19 18:25:01 +080012353 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012354 /*
12355 * Specific handling if we autodetected an encoding, we should not
12356 * push more than the first line ... which depend on the encoding
12357 * And only push the rest once the final encoding was detected
12358 */
12359 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12360 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012361 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012362
12363 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12364 BAD_CAST "UTF-16")) ||
12365 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12366 BAD_CAST "UTF16")))
12367 len = 90;
12368 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12369 BAD_CAST "UCS-4")) ||
12370 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12371 BAD_CAST "UCS4")))
12372 len = 180;
12373
12374 if (ctxt->input->buf->rawconsumed < len)
12375 len -= ctxt->input->buf->rawconsumed;
12376
Raul Hudeaba9716a2010-03-15 10:13:29 +010012377 /*
12378 * Change size for reading the initial declaration only
12379 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12380 * will blindly copy extra bytes from memory.
12381 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012382 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012383 remain = size - len;
12384 size = len;
12385 } else {
12386 remain = 0;
12387 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012388 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012389 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012390 if (res < 0) {
12391 ctxt->errNo = XML_PARSER_EOF;
Daniel Veillarde3b15972015-11-20 14:59:30 +080012392 xmlHaltParser(ctxt);
William M. Bracka3215c72004-07-31 16:24:01 +000012393 return (XML_PARSER_EOF);
12394 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012395 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012396#ifdef DEBUG_PUSH
12397 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12398#endif
12399
Owen Taylor3473f882001-02-23 17:55:21 +000012400 } else if (ctxt->instate != XML_PARSER_EOF) {
12401 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12402 xmlParserInputBufferPtr in = ctxt->input->buf;
12403 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12404 (in->raw != NULL)) {
12405 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012406 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12407 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012408
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012409 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012410 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012411 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012412 xmlGenericError(xmlGenericErrorContext,
12413 "xmlParseChunk: encoder error\n");
12414 return(XML_ERR_INVALID_ENCODING);
12415 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012416 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012417 }
12418 }
12419 }
Daniel Veillard65686452012-07-19 18:25:01 +080012420 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012421 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012422 } else {
12423 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12424 avail = xmlBufUse(ctxt->input->buf->buffer);
12425 /*
12426 * Depending on the current state it may not be such
12427 * a good idea to try parsing if there is nothing in the chunk
12428 * which would be worth doing a parser state transition and we
12429 * need to wait for more data
12430 */
12431 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12432 (old_avail == 0) || (avail == 0) ||
12433 (xmlParseCheckTransition(ctxt,
12434 (const char *)&ctxt->input->base[old_avail],
12435 avail - old_avail)))
12436 xmlParseTryOrFinish(ctxt, terminate);
12437 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012438 if (ctxt->instate == XML_PARSER_EOF)
12439 return(ctxt->errNo);
12440
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012441 if ((ctxt->input != NULL) &&
12442 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12443 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12444 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12445 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillarde3b15972015-11-20 14:59:30 +080012446 xmlHaltParser(ctxt);
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012447 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012448 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12449 return(ctxt->errNo);
12450
12451 if (remain != 0) {
12452 chunk += size;
12453 size = remain;
12454 remain = 0;
12455 goto xmldecl_done;
12456 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012457 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12458 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012459 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12460 ctxt->input);
12461 size_t current = ctxt->input->cur - ctxt->input->base;
12462
Daniel Veillarda617e242006-01-09 14:38:44 +000012463 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012464
12465 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12466 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012467 }
Owen Taylor3473f882001-02-23 17:55:21 +000012468 if (terminate) {
12469 /*
12470 * Check for termination
12471 */
Daniel Veillard65686452012-07-19 18:25:01 +080012472 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012473
12474 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012475 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012476 cur_avail = ctxt->input->length -
12477 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012478 else
Daniel Veillard65686452012-07-19 18:25:01 +080012479 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12480 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012481 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012482
Owen Taylor3473f882001-02-23 17:55:21 +000012483 if ((ctxt->instate != XML_PARSER_EOF) &&
12484 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012485 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012486 }
Daniel Veillard65686452012-07-19 18:25:01 +080012487 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012488 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012489 }
Owen Taylor3473f882001-02-23 17:55:21 +000012490 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012491 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012492 ctxt->sax->endDocument(ctxt->userData);
12493 }
12494 ctxt->instate = XML_PARSER_EOF;
12495 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012496 if (ctxt->wellFormed == 0)
12497 return((xmlParserErrors) ctxt->errNo);
12498 else
12499 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012500}
12501
12502/************************************************************************
12503 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012504 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012505 * *
12506 ************************************************************************/
12507
12508/**
Owen Taylor3473f882001-02-23 17:55:21 +000012509 * xmlCreatePushParserCtxt:
12510 * @sax: a SAX handler
12511 * @user_data: The user data returned on SAX callbacks
12512 * @chunk: a pointer to an array of chars
12513 * @size: number of chars in the array
12514 * @filename: an optional file name or URI
12515 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012516 * Create a parser context for using the XML parser in push mode.
12517 * If @buffer and @size are non-NULL, the data is used to detect
12518 * the encoding. The remaining characters will be parsed so they
12519 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012520 * To allow content encoding detection, @size should be >= 4
12521 * The value of @filename is used for fetching external entities
12522 * and error/warning reports.
12523 *
12524 * Returns the new parser context or NULL
12525 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012526
Owen Taylor3473f882001-02-23 17:55:21 +000012527xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012528xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012529 const char *chunk, int size, const char *filename) {
12530 xmlParserCtxtPtr ctxt;
12531 xmlParserInputPtr inputStream;
12532 xmlParserInputBufferPtr buf;
12533 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12534
12535 /*
12536 * plug some encoding conversion routines
12537 */
12538 if ((chunk != NULL) && (size >= 4))
12539 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12540
12541 buf = xmlAllocParserInputBuffer(enc);
12542 if (buf == NULL) return(NULL);
12543
12544 ctxt = xmlNewParserCtxt();
12545 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012546 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012547 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012548 return(NULL);
12549 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012550 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012551 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12552 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012553 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012554 xmlFreeParserInputBuffer(buf);
12555 xmlFreeParserCtxt(ctxt);
12556 return(NULL);
12557 }
Owen Taylor3473f882001-02-23 17:55:21 +000012558 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012559#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012560 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012561#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012562 xmlFree(ctxt->sax);
12563 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12564 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012565 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012566 xmlFreeParserInputBuffer(buf);
12567 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012568 return(NULL);
12569 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012570 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12571 if (sax->initialized == XML_SAX2_MAGIC)
12572 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12573 else
12574 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012575 if (user_data != NULL)
12576 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012577 }
Owen Taylor3473f882001-02-23 17:55:21 +000012578 if (filename == NULL) {
12579 ctxt->directory = NULL;
12580 } else {
12581 ctxt->directory = xmlParserGetDirectory(filename);
12582 }
12583
12584 inputStream = xmlNewInputStream(ctxt);
12585 if (inputStream == NULL) {
12586 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012587 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012588 return(NULL);
12589 }
12590
12591 if (filename == NULL)
12592 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012593 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012594 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012595 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012596 if (inputStream->filename == NULL) {
12597 xmlFreeParserCtxt(ctxt);
12598 xmlFreeParserInputBuffer(buf);
12599 return(NULL);
12600 }
12601 }
Owen Taylor3473f882001-02-23 17:55:21 +000012602 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012603 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012604 inputPush(ctxt, inputStream);
12605
William M. Brack3a1cd212005-02-11 14:35:54 +000012606 /*
12607 * If the caller didn't provide an initial 'chunk' for determining
12608 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12609 * that it can be automatically determined later
12610 */
12611 if ((size == 0) || (chunk == NULL)) {
12612 ctxt->charset = XML_CHAR_ENCODING_NONE;
12613 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012614 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12615 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012616
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012617 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012618
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012619 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012620#ifdef DEBUG_PUSH
12621 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12622#endif
12623 }
12624
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012625 if (enc != XML_CHAR_ENCODING_NONE) {
12626 xmlSwitchEncoding(ctxt, enc);
12627 }
12628
Owen Taylor3473f882001-02-23 17:55:21 +000012629 return(ctxt);
12630}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012631#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012632
12633/**
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012634 * xmlHaltParser:
12635 * @ctxt: an XML parser context
12636 *
12637 * Blocks further parser processing don't override error
12638 * for internal use
12639 */
12640static void
12641xmlHaltParser(xmlParserCtxtPtr ctxt) {
12642 if (ctxt == NULL)
12643 return;
12644 ctxt->instate = XML_PARSER_EOF;
12645 ctxt->disableSAX = 1;
12646 if (ctxt->input != NULL) {
12647 /*
12648 * in case there was a specific allocation deallocate before
12649 * overriding base
12650 */
12651 if (ctxt->input->free != NULL) {
12652 ctxt->input->free((xmlChar *) ctxt->input->base);
12653 ctxt->input->free = NULL;
12654 }
12655 ctxt->input->cur = BAD_CAST"";
12656 ctxt->input->base = ctxt->input->cur;
12657 }
12658}
12659
12660/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012661 * xmlStopParser:
12662 * @ctxt: an XML parser context
12663 *
12664 * Blocks further parser processing
12665 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012666void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012667xmlStopParser(xmlParserCtxtPtr ctxt) {
12668 if (ctxt == NULL)
12669 return;
Daniel Veillard28cd9cb2015-11-20 14:55:30 +080012670 xmlHaltParser(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012671 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012672}
12673
12674/**
Owen Taylor3473f882001-02-23 17:55:21 +000012675 * xmlCreateIOParserCtxt:
12676 * @sax: a SAX handler
12677 * @user_data: The user data returned on SAX callbacks
12678 * @ioread: an I/O read function
12679 * @ioclose: an I/O close function
12680 * @ioctx: an I/O handler
12681 * @enc: the charset encoding if known
12682 *
12683 * Create a parser context for using the XML parser with an existing
12684 * I/O stream
12685 *
12686 * Returns the new parser context or NULL
12687 */
12688xmlParserCtxtPtr
12689xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12690 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12691 void *ioctx, xmlCharEncoding enc) {
12692 xmlParserCtxtPtr ctxt;
12693 xmlParserInputPtr inputStream;
12694 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012695
Daniel Veillard42595322004-11-08 10:52:06 +000012696 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012697
12698 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012699 if (buf == NULL) {
12700 if (ioclose != NULL)
12701 ioclose(ioctx);
12702 return (NULL);
12703 }
Owen Taylor3473f882001-02-23 17:55:21 +000012704
12705 ctxt = xmlNewParserCtxt();
12706 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012707 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012708 return(NULL);
12709 }
12710 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012711#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012712 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012713#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012714 xmlFree(ctxt->sax);
12715 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12716 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012717 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012718 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012719 return(NULL);
12720 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012721 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12722 if (sax->initialized == XML_SAX2_MAGIC)
12723 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12724 else
12725 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012726 if (user_data != NULL)
12727 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012728 }
Owen Taylor3473f882001-02-23 17:55:21 +000012729
12730 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12731 if (inputStream == NULL) {
12732 xmlFreeParserCtxt(ctxt);
12733 return(NULL);
12734 }
12735 inputPush(ctxt, inputStream);
12736
12737 return(ctxt);
12738}
12739
Daniel Veillard4432df22003-09-28 18:58:27 +000012740#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012741/************************************************************************
12742 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012743 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012744 * *
12745 ************************************************************************/
12746
12747/**
12748 * xmlIOParseDTD:
12749 * @sax: the SAX handler block or NULL
12750 * @input: an Input Buffer
12751 * @enc: the charset encoding if known
12752 *
12753 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012754 *
Owen Taylor3473f882001-02-23 17:55:21 +000012755 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012756 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012757 */
12758
12759xmlDtdPtr
12760xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12761 xmlCharEncoding enc) {
12762 xmlDtdPtr ret = NULL;
12763 xmlParserCtxtPtr ctxt;
12764 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012765 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012766
12767 if (input == NULL)
12768 return(NULL);
12769
12770 ctxt = xmlNewParserCtxt();
12771 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012772 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012773 return(NULL);
12774 }
12775
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012776 /* We are loading a DTD */
12777 ctxt->options |= XML_PARSE_DTDLOAD;
12778
Owen Taylor3473f882001-02-23 17:55:21 +000012779 /*
12780 * Set-up the SAX context
12781 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012782 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012783 if (ctxt->sax != NULL)
12784 xmlFree(ctxt->sax);
12785 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012786 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012787 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012788 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012789
12790 /*
12791 * generate a parser input from the I/O handler
12792 */
12793
Daniel Veillard43caefb2003-12-07 19:32:22 +000012794 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012795 if (pinput == NULL) {
12796 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012797 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012798 xmlFreeParserCtxt(ctxt);
12799 return(NULL);
12800 }
12801
12802 /*
12803 * plug some encoding conversion routines here.
12804 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012805 if (xmlPushInput(ctxt, pinput) < 0) {
12806 if (sax != NULL) ctxt->sax = NULL;
12807 xmlFreeParserCtxt(ctxt);
12808 return(NULL);
12809 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012810 if (enc != XML_CHAR_ENCODING_NONE) {
12811 xmlSwitchEncoding(ctxt, enc);
12812 }
Owen Taylor3473f882001-02-23 17:55:21 +000012813
12814 pinput->filename = NULL;
12815 pinput->line = 1;
12816 pinput->col = 1;
12817 pinput->base = ctxt->input->cur;
12818 pinput->cur = ctxt->input->cur;
12819 pinput->free = NULL;
12820
12821 /*
12822 * let's parse that entity knowing it's an external subset.
12823 */
12824 ctxt->inSubset = 2;
12825 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012826 if (ctxt->myDoc == NULL) {
12827 xmlErrMemory(ctxt, "New Doc failed");
12828 return(NULL);
12829 }
12830 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012831 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12832 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012833
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012834 if ((enc == XML_CHAR_ENCODING_NONE) &&
12835 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012836 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012837 * Get the 4 first bytes and decode the charset
12838 * if enc != XML_CHAR_ENCODING_NONE
12839 * plug some encoding conversion routines.
12840 */
12841 start[0] = RAW;
12842 start[1] = NXT(1);
12843 start[2] = NXT(2);
12844 start[3] = NXT(3);
12845 enc = xmlDetectCharEncoding(start, 4);
12846 if (enc != XML_CHAR_ENCODING_NONE) {
12847 xmlSwitchEncoding(ctxt, enc);
12848 }
12849 }
12850
Owen Taylor3473f882001-02-23 17:55:21 +000012851 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12852
12853 if (ctxt->myDoc != NULL) {
12854 if (ctxt->wellFormed) {
12855 ret = ctxt->myDoc->extSubset;
12856 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012857 if (ret != NULL) {
12858 xmlNodePtr tmp;
12859
12860 ret->doc = NULL;
12861 tmp = ret->children;
12862 while (tmp != NULL) {
12863 tmp->doc = NULL;
12864 tmp = tmp->next;
12865 }
12866 }
Owen Taylor3473f882001-02-23 17:55:21 +000012867 } else {
12868 ret = NULL;
12869 }
12870 xmlFreeDoc(ctxt->myDoc);
12871 ctxt->myDoc = NULL;
12872 }
12873 if (sax != NULL) ctxt->sax = NULL;
12874 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012875
Owen Taylor3473f882001-02-23 17:55:21 +000012876 return(ret);
12877}
12878
12879/**
12880 * xmlSAXParseDTD:
12881 * @sax: the SAX handler block
12882 * @ExternalID: a NAME* containing the External ID of the DTD
12883 * @SystemID: a NAME* containing the URL to the DTD
12884 *
12885 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012886 *
Owen Taylor3473f882001-02-23 17:55:21 +000012887 * Returns the resulting xmlDtdPtr or NULL in case of error.
12888 */
12889
12890xmlDtdPtr
12891xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12892 const xmlChar *SystemID) {
12893 xmlDtdPtr ret = NULL;
12894 xmlParserCtxtPtr ctxt;
12895 xmlParserInputPtr input = NULL;
12896 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012897 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012898
12899 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12900
12901 ctxt = xmlNewParserCtxt();
12902 if (ctxt == NULL) {
12903 return(NULL);
12904 }
12905
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012906 /* We are loading a DTD */
12907 ctxt->options |= XML_PARSE_DTDLOAD;
12908
Owen Taylor3473f882001-02-23 17:55:21 +000012909 /*
12910 * Set-up the SAX context
12911 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012912 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012913 if (ctxt->sax != NULL)
12914 xmlFree(ctxt->sax);
12915 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012916 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012917 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012918
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012919 /*
12920 * Canonicalise the system ID
12921 */
12922 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012923 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012924 xmlFreeParserCtxt(ctxt);
12925 return(NULL);
12926 }
Owen Taylor3473f882001-02-23 17:55:21 +000012927
12928 /*
12929 * Ask the Entity resolver to load the damn thing
12930 */
12931
12932 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012933 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12934 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012935 if (input == NULL) {
12936 if (sax != NULL) ctxt->sax = NULL;
12937 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012938 if (systemIdCanonic != NULL)
12939 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012940 return(NULL);
12941 }
12942
12943 /*
12944 * plug some encoding conversion routines here.
12945 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012946 if (xmlPushInput(ctxt, input) < 0) {
12947 if (sax != NULL) ctxt->sax = NULL;
12948 xmlFreeParserCtxt(ctxt);
12949 if (systemIdCanonic != NULL)
12950 xmlFree(systemIdCanonic);
12951 return(NULL);
12952 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012953 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12954 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12955 xmlSwitchEncoding(ctxt, enc);
12956 }
Owen Taylor3473f882001-02-23 17:55:21 +000012957
12958 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012959 input->filename = (char *) systemIdCanonic;
12960 else
12961 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012962 input->line = 1;
12963 input->col = 1;
12964 input->base = ctxt->input->cur;
12965 input->cur = ctxt->input->cur;
12966 input->free = NULL;
12967
12968 /*
12969 * let's parse that entity knowing it's an external subset.
12970 */
12971 ctxt->inSubset = 2;
12972 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012973 if (ctxt->myDoc == NULL) {
12974 xmlErrMemory(ctxt, "New Doc failed");
12975 if (sax != NULL) ctxt->sax = NULL;
12976 xmlFreeParserCtxt(ctxt);
12977 return(NULL);
12978 }
12979 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012980 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12981 ExternalID, SystemID);
12982 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12983
12984 if (ctxt->myDoc != NULL) {
12985 if (ctxt->wellFormed) {
12986 ret = ctxt->myDoc->extSubset;
12987 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012988 if (ret != NULL) {
12989 xmlNodePtr tmp;
12990
12991 ret->doc = NULL;
12992 tmp = ret->children;
12993 while (tmp != NULL) {
12994 tmp->doc = NULL;
12995 tmp = tmp->next;
12996 }
12997 }
Owen Taylor3473f882001-02-23 17:55:21 +000012998 } else {
12999 ret = NULL;
13000 }
13001 xmlFreeDoc(ctxt->myDoc);
13002 ctxt->myDoc = NULL;
13003 }
13004 if (sax != NULL) ctxt->sax = NULL;
13005 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013006
Owen Taylor3473f882001-02-23 17:55:21 +000013007 return(ret);
13008}
13009
Daniel Veillard4432df22003-09-28 18:58:27 +000013010
Owen Taylor3473f882001-02-23 17:55:21 +000013011/**
13012 * xmlParseDTD:
13013 * @ExternalID: a NAME* containing the External ID of the DTD
13014 * @SystemID: a NAME* containing the URL to the DTD
13015 *
13016 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000013017 *
Owen Taylor3473f882001-02-23 17:55:21 +000013018 * Returns the resulting xmlDtdPtr or NULL in case of error.
13019 */
13020
13021xmlDtdPtr
13022xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
13023 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
13024}
Daniel Veillard4432df22003-09-28 18:58:27 +000013025#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013026
13027/************************************************************************
13028 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013029 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000013030 * *
13031 ************************************************************************/
13032
13033/**
Owen Taylor3473f882001-02-23 17:55:21 +000013034 * xmlParseCtxtExternalEntity:
13035 * @ctx: the existing parsing context
13036 * @URL: the URL for the entity to load
13037 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013038 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013039 *
13040 * Parse an external general entity within an existing parsing context
13041 * An external general parsed entity is well-formed if it matches the
13042 * production labeled extParsedEnt.
13043 *
13044 * [78] extParsedEnt ::= TextDecl? content
13045 *
13046 * Returns 0 if the entity is well formed, -1 in case of args problem and
13047 * the parser error code otherwise
13048 */
13049
13050int
13051xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013052 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000013053 xmlParserCtxtPtr ctxt;
13054 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013055 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013056 xmlSAXHandlerPtr oldsax = NULL;
13057 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013058 xmlChar start[4];
13059 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013060
Daniel Veillardce682bc2004-11-05 17:22:25 +000013061 if (ctx == NULL) return(-1);
13062
Daniel Veillard0161e632008-08-28 15:36:32 +000013063 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
13064 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013065 return(XML_ERR_ENTITY_LOOP);
13066 }
13067
Daniel Veillardcda96922001-08-21 10:56:31 +000013068 if (lst != NULL)
13069 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013070 if ((URL == NULL) && (ID == NULL))
13071 return(-1);
13072 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
13073 return(-1);
13074
Rob Richards798743a2009-06-19 13:54:25 -040013075 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000013076 if (ctxt == NULL) {
13077 return(-1);
13078 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013079
Owen Taylor3473f882001-02-23 17:55:21 +000013080 oldsax = ctxt->sax;
13081 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013082 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013083 newDoc = xmlNewDoc(BAD_CAST "1.0");
13084 if (newDoc == NULL) {
13085 xmlFreeParserCtxt(ctxt);
13086 return(-1);
13087 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013088 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013089 if (ctx->myDoc->dict) {
13090 newDoc->dict = ctx->myDoc->dict;
13091 xmlDictReference(newDoc->dict);
13092 }
Owen Taylor3473f882001-02-23 17:55:21 +000013093 if (ctx->myDoc != NULL) {
13094 newDoc->intSubset = ctx->myDoc->intSubset;
13095 newDoc->extSubset = ctx->myDoc->extSubset;
13096 }
13097 if (ctx->myDoc->URL != NULL) {
13098 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
13099 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013100 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13101 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013102 ctxt->sax = oldsax;
13103 xmlFreeParserCtxt(ctxt);
13104 newDoc->intSubset = NULL;
13105 newDoc->extSubset = NULL;
13106 xmlFreeDoc(newDoc);
13107 return(-1);
13108 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013109 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013110 nodePush(ctxt, newDoc->children);
13111 if (ctx->myDoc == NULL) {
13112 ctxt->myDoc = newDoc;
13113 } else {
13114 ctxt->myDoc = ctx->myDoc;
13115 newDoc->children->doc = ctx->myDoc;
13116 }
13117
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013118 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013119 * Get the 4 first bytes and decode the charset
13120 * if enc != XML_CHAR_ENCODING_NONE
13121 * plug some encoding conversion routines.
13122 */
13123 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013124 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13125 start[0] = RAW;
13126 start[1] = NXT(1);
13127 start[2] = NXT(2);
13128 start[3] = NXT(3);
13129 enc = xmlDetectCharEncoding(start, 4);
13130 if (enc != XML_CHAR_ENCODING_NONE) {
13131 xmlSwitchEncoding(ctxt, enc);
13132 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013133 }
13134
Owen Taylor3473f882001-02-23 17:55:21 +000013135 /*
13136 * Parse a possible text declaration first
13137 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013138 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013139 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013140 /*
13141 * An XML-1.0 document can't reference an entity not XML-1.0
13142 */
13143 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13144 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013145 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013146 "Version mismatch between document and entity\n");
13147 }
Owen Taylor3473f882001-02-23 17:55:21 +000013148 }
13149
13150 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080013151 * If the user provided its own SAX callbacks then reuse the
13152 * useData callback field, otherwise the expected setup in a
13153 * DOM builder is to have userData == ctxt
13154 */
13155 if (ctx->userData == ctx)
13156 ctxt->userData = ctxt;
13157 else
13158 ctxt->userData = ctx->userData;
13159
13160 /*
Owen Taylor3473f882001-02-23 17:55:21 +000013161 * Doing validity checking on chunk doesn't make sense
13162 */
13163 ctxt->instate = XML_PARSER_CONTENT;
13164 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013165 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013166 ctxt->loadsubset = ctx->loadsubset;
13167 ctxt->depth = ctx->depth + 1;
13168 ctxt->replaceEntities = ctx->replaceEntities;
13169 if (ctxt->validate) {
13170 ctxt->vctxt.error = ctx->vctxt.error;
13171 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000013172 } else {
13173 ctxt->vctxt.error = NULL;
13174 ctxt->vctxt.warning = NULL;
13175 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000013176 ctxt->vctxt.nodeTab = NULL;
13177 ctxt->vctxt.nodeNr = 0;
13178 ctxt->vctxt.nodeMax = 0;
13179 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013180 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13181 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013182 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13183 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13184 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013185 ctxt->dictNames = ctx->dictNames;
13186 ctxt->attsDefault = ctx->attsDefault;
13187 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013188 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013189
13190 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013191
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013192 ctx->validate = ctxt->validate;
13193 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013194 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013195 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013196 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013197 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013198 }
13199 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013200 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013201 }
13202
13203 if (!ctxt->wellFormed) {
13204 if (ctxt->errNo == 0)
13205 ret = 1;
13206 else
13207 ret = ctxt->errNo;
13208 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013209 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013210 xmlNodePtr cur;
13211
13212 /*
13213 * Return the newly created nodeset after unlinking it from
13214 * they pseudo parent.
13215 */
13216 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013217 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013218 while (cur != NULL) {
13219 cur->parent = NULL;
13220 cur = cur->next;
13221 }
13222 newDoc->children->children = NULL;
13223 }
13224 ret = 0;
13225 }
13226 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013227 ctxt->dict = NULL;
13228 ctxt->attsDefault = NULL;
13229 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013230 xmlFreeParserCtxt(ctxt);
13231 newDoc->intSubset = NULL;
13232 newDoc->extSubset = NULL;
13233 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013234
Owen Taylor3473f882001-02-23 17:55:21 +000013235 return(ret);
13236}
13237
13238/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013239 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013240 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013241 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013242 * @sax: the SAX handler bloc (possibly NULL)
13243 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13244 * @depth: Used for loop detection, use 0
13245 * @URL: the URL for the entity to load
13246 * @ID: the System ID for the entity to load
13247 * @list: the return value for the set of parsed nodes
13248 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013249 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013250 *
13251 * Returns 0 if the entity is well formed, -1 in case of args problem and
13252 * the parser error code otherwise
13253 */
13254
Daniel Veillard7d515752003-09-26 19:12:37 +000013255static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013256xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13257 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013258 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013259 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013260 xmlParserCtxtPtr ctxt;
13261 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013262 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013263 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013264 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013265 xmlChar start[4];
13266 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013267
Daniel Veillard0161e632008-08-28 15:36:32 +000013268 if (((depth > 40) &&
13269 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13270 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013271 return(XML_ERR_ENTITY_LOOP);
13272 }
13273
Owen Taylor3473f882001-02-23 17:55:21 +000013274 if (list != NULL)
13275 *list = NULL;
13276 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013277 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013278 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013279 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013280
13281
Rob Richards9c0aa472009-03-26 18:10:19 +000013282 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013283 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013284 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013285 if (oldctxt != NULL) {
13286 ctxt->_private = oldctxt->_private;
13287 ctxt->loadsubset = oldctxt->loadsubset;
13288 ctxt->validate = oldctxt->validate;
13289 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013290 ctxt->record_info = oldctxt->record_info;
13291 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13292 ctxt->node_seq.length = oldctxt->node_seq.length;
13293 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013294 } else {
13295 /*
13296 * Doing validity checking on chunk without context
13297 * doesn't make sense
13298 */
13299 ctxt->_private = NULL;
13300 ctxt->validate = 0;
13301 ctxt->external = 2;
13302 ctxt->loadsubset = 0;
13303 }
Owen Taylor3473f882001-02-23 17:55:21 +000013304 if (sax != NULL) {
13305 oldsax = ctxt->sax;
13306 ctxt->sax = sax;
13307 if (user_data != NULL)
13308 ctxt->userData = user_data;
13309 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013310 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013311 newDoc = xmlNewDoc(BAD_CAST "1.0");
13312 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013313 ctxt->node_seq.maximum = 0;
13314 ctxt->node_seq.length = 0;
13315 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013316 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013317 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013318 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013319 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013320 newDoc->intSubset = doc->intSubset;
13321 newDoc->extSubset = doc->extSubset;
13322 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013323 xmlDictReference(newDoc->dict);
13324
Owen Taylor3473f882001-02-23 17:55:21 +000013325 if (doc->URL != NULL) {
13326 newDoc->URL = xmlStrdup(doc->URL);
13327 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013328 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13329 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013330 if (sax != NULL)
13331 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013332 ctxt->node_seq.maximum = 0;
13333 ctxt->node_seq.length = 0;
13334 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013335 xmlFreeParserCtxt(ctxt);
13336 newDoc->intSubset = NULL;
13337 newDoc->extSubset = NULL;
13338 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013339 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013340 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013341 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013342 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013343 ctxt->myDoc = doc;
13344 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013345
Daniel Veillard0161e632008-08-28 15:36:32 +000013346 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013347 * Get the 4 first bytes and decode the charset
13348 * if enc != XML_CHAR_ENCODING_NONE
13349 * plug some encoding conversion routines.
13350 */
13351 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013352 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13353 start[0] = RAW;
13354 start[1] = NXT(1);
13355 start[2] = NXT(2);
13356 start[3] = NXT(3);
13357 enc = xmlDetectCharEncoding(start, 4);
13358 if (enc != XML_CHAR_ENCODING_NONE) {
13359 xmlSwitchEncoding(ctxt, enc);
13360 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013361 }
13362
Owen Taylor3473f882001-02-23 17:55:21 +000013363 /*
13364 * Parse a possible text declaration first
13365 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013366 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013367 xmlParseTextDecl(ctxt);
13368 }
13369
Owen Taylor3473f882001-02-23 17:55:21 +000013370 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013371 ctxt->depth = depth;
13372
13373 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013374
Daniel Veillard561b7f82002-03-20 21:55:57 +000013375 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013376 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013377 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013378 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013379 }
13380 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013381 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013382 }
13383
13384 if (!ctxt->wellFormed) {
13385 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013386 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013387 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013388 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013389 } else {
13390 if (list != NULL) {
13391 xmlNodePtr cur;
13392
13393 /*
13394 * Return the newly created nodeset after unlinking it from
13395 * they pseudo parent.
13396 */
13397 cur = newDoc->children->children;
13398 *list = cur;
13399 while (cur != NULL) {
13400 cur->parent = NULL;
13401 cur = cur->next;
13402 }
13403 newDoc->children->children = NULL;
13404 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013405 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013406 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013407
13408 /*
13409 * Record in the parent context the number of entities replacement
13410 * done when parsing that reference.
13411 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013412 if (oldctxt != NULL)
13413 oldctxt->nbentities += ctxt->nbentities;
13414
Daniel Veillard0161e632008-08-28 15:36:32 +000013415 /*
13416 * Also record the size of the entity parsed
13417 */
Gaurav Guptacf77e602015-09-30 14:46:29 +020013418 if (ctxt->input != NULL && oldctxt != NULL) {
Daniel Veillard0161e632008-08-28 15:36:32 +000013419 oldctxt->sizeentities += ctxt->input->consumed;
13420 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13421 }
13422 /*
13423 * And record the last error if any
13424 */
13425 if (ctxt->lastError.code != XML_ERR_OK)
13426 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13427
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013428 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013429 ctxt->sax = oldsax;
Gaurav Guptacf77e602015-09-30 14:46:29 +020013430 if (oldctxt != NULL) {
13431 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13432 oldctxt->node_seq.length = ctxt->node_seq.length;
13433 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13434 }
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013435 ctxt->node_seq.maximum = 0;
13436 ctxt->node_seq.length = 0;
13437 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013438 xmlFreeParserCtxt(ctxt);
13439 newDoc->intSubset = NULL;
13440 newDoc->extSubset = NULL;
13441 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013442
Owen Taylor3473f882001-02-23 17:55:21 +000013443 return(ret);
13444}
13445
Daniel Veillard81273902003-09-30 00:43:48 +000013446#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013447/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013448 * xmlParseExternalEntity:
13449 * @doc: the document the chunk pertains to
13450 * @sax: the SAX handler bloc (possibly NULL)
13451 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13452 * @depth: Used for loop detection, use 0
13453 * @URL: the URL for the entity to load
13454 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013455 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013456 *
13457 * Parse an external general entity
13458 * An external general parsed entity is well-formed if it matches the
13459 * production labeled extParsedEnt.
13460 *
13461 * [78] extParsedEnt ::= TextDecl? content
13462 *
13463 * Returns 0 if the entity is well formed, -1 in case of args problem and
13464 * the parser error code otherwise
13465 */
13466
13467int
13468xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013469 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013470 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013471 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013472}
13473
13474/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013475 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013476 * @doc: the document the chunk pertains to
13477 * @sax: the SAX handler bloc (possibly NULL)
13478 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13479 * @depth: Used for loop detection, use 0
13480 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013481 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013482 *
13483 * Parse a well-balanced chunk of an XML document
13484 * called by the parser
13485 * The allowed sequence for the Well Balanced Chunk is the one defined by
13486 * the content production in the XML grammar:
13487 *
13488 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13489 *
13490 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13491 * the parser error code otherwise
13492 */
13493
13494int
13495xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013496 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013497 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13498 depth, string, lst, 0 );
13499}
Daniel Veillard81273902003-09-30 00:43:48 +000013500#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013501
13502/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013503 * xmlParseBalancedChunkMemoryInternal:
13504 * @oldctxt: the existing parsing context
13505 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13506 * @user_data: the user data field for the parser context
13507 * @lst: the return value for the set of parsed nodes
13508 *
13509 *
13510 * Parse a well-balanced chunk of an XML document
13511 * called by the parser
13512 * The allowed sequence for the Well Balanced Chunk is the one defined by
13513 * the content production in the XML grammar:
13514 *
13515 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13516 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013517 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13518 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013519 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013520 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013521 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013522 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013523static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013524xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13525 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13526 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013527 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013528 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013529 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013530 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013531 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013532 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013533 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013534#ifdef SAX2
13535 int i;
13536#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013537
Daniel Veillard0161e632008-08-28 15:36:32 +000013538 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13539 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013540 return(XML_ERR_ENTITY_LOOP);
13541 }
13542
13543
13544 if (lst != NULL)
13545 *lst = NULL;
13546 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013547 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013548
13549 size = xmlStrlen(string);
13550
13551 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013552 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013553 if (user_data != NULL)
13554 ctxt->userData = user_data;
13555 else
13556 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013557 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13558 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013559 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13560 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13561 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013562
Daniel Veillard74eaec12009-08-26 15:57:20 +020013563#ifdef SAX2
13564 /* propagate namespaces down the entity */
13565 for (i = 0;i < oldctxt->nsNr;i += 2) {
13566 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13567 }
13568#endif
13569
Daniel Veillard328f48c2002-11-15 15:24:34 +000013570 oldsax = ctxt->sax;
13571 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013572 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013573 ctxt->replaceEntities = oldctxt->replaceEntities;
13574 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013575
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013576 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013577 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013578 newDoc = xmlNewDoc(BAD_CAST "1.0");
13579 if (newDoc == NULL) {
13580 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013581 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013582 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013583 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013584 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013585 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013586 newDoc->dict = ctxt->dict;
13587 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013588 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013589 } else {
13590 ctxt->myDoc = oldctxt->myDoc;
13591 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013592 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013593 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013594 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13595 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013596 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013597 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013598 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013599 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013600 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013601 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013602 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013603 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013604 ctxt->myDoc->children = NULL;
13605 ctxt->myDoc->last = NULL;
13606 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013607 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013608 ctxt->instate = XML_PARSER_CONTENT;
13609 ctxt->depth = oldctxt->depth + 1;
13610
Daniel Veillard328f48c2002-11-15 15:24:34 +000013611 ctxt->validate = 0;
13612 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013613 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13614 /*
13615 * ID/IDREF registration will be done in xmlValidateElement below
13616 */
13617 ctxt->loadsubset |= XML_SKIP_IDS;
13618 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013619 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013620 ctxt->attsDefault = oldctxt->attsDefault;
13621 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013622
Daniel Veillard68e9e742002-11-16 15:35:11 +000013623 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013624 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013625 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013626 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013627 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013628 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013629 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013630 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013631 }
13632
13633 if (!ctxt->wellFormed) {
13634 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013635 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013636 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013637 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013638 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013639 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013640 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013641
William M. Brack7b9154b2003-09-27 19:23:50 +000013642 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013643 xmlNodePtr cur;
13644
13645 /*
13646 * Return the newly created nodeset after unlinking it from
13647 * they pseudo parent.
13648 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013649 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013650 *lst = cur;
13651 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013652#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013653 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13654 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13655 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013656 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13657 oldctxt->myDoc, cur);
13658 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013659#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013660 cur->parent = NULL;
13661 cur = cur->next;
13662 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013663 ctxt->myDoc->children->children = NULL;
13664 }
13665 if (ctxt->myDoc != NULL) {
13666 xmlFreeNode(ctxt->myDoc->children);
13667 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013668 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013669 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013670
13671 /*
13672 * Record in the parent context the number of entities replacement
13673 * done when parsing that reference.
13674 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013675 if (oldctxt != NULL)
13676 oldctxt->nbentities += ctxt->nbentities;
13677
Daniel Veillard0161e632008-08-28 15:36:32 +000013678 /*
13679 * Also record the last error if any
13680 */
13681 if (ctxt->lastError.code != XML_ERR_OK)
13682 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13683
Daniel Veillard328f48c2002-11-15 15:24:34 +000013684 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013685 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013686 ctxt->attsDefault = NULL;
13687 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013688 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013689 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013690 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013691 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013692
Daniel Veillard328f48c2002-11-15 15:24:34 +000013693 return(ret);
13694}
13695
Daniel Veillard29b17482004-08-16 00:39:03 +000013696/**
13697 * xmlParseInNodeContext:
13698 * @node: the context node
13699 * @data: the input string
13700 * @datalen: the input string length in bytes
13701 * @options: a combination of xmlParserOption
13702 * @lst: the return value for the set of parsed nodes
13703 *
13704 * Parse a well-balanced chunk of an XML document
13705 * within the context (DTD, namespaces, etc ...) of the given node.
13706 *
13707 * The allowed sequence for the data is a Well Balanced Chunk defined by
13708 * the content production in the XML grammar:
13709 *
13710 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13711 *
13712 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13713 * error code otherwise
13714 */
13715xmlParserErrors
13716xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13717 int options, xmlNodePtr *lst) {
13718#ifdef SAX2
13719 xmlParserCtxtPtr ctxt;
13720 xmlDocPtr doc = NULL;
13721 xmlNodePtr fake, cur;
13722 int nsnr = 0;
13723
13724 xmlParserErrors ret = XML_ERR_OK;
13725
13726 /*
13727 * check all input parameters, grab the document
13728 */
13729 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13730 return(XML_ERR_INTERNAL_ERROR);
13731 switch (node->type) {
13732 case XML_ELEMENT_NODE:
13733 case XML_ATTRIBUTE_NODE:
13734 case XML_TEXT_NODE:
13735 case XML_CDATA_SECTION_NODE:
13736 case XML_ENTITY_REF_NODE:
13737 case XML_PI_NODE:
13738 case XML_COMMENT_NODE:
13739 case XML_DOCUMENT_NODE:
13740 case XML_HTML_DOCUMENT_NODE:
13741 break;
13742 default:
13743 return(XML_ERR_INTERNAL_ERROR);
13744
13745 }
13746 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13747 (node->type != XML_DOCUMENT_NODE) &&
13748 (node->type != XML_HTML_DOCUMENT_NODE))
13749 node = node->parent;
13750 if (node == NULL)
13751 return(XML_ERR_INTERNAL_ERROR);
13752 if (node->type == XML_ELEMENT_NODE)
13753 doc = node->doc;
13754 else
13755 doc = (xmlDocPtr) node;
13756 if (doc == NULL)
13757 return(XML_ERR_INTERNAL_ERROR);
13758
13759 /*
13760 * allocate a context and set-up everything not related to the
13761 * node position in the tree
13762 */
13763 if (doc->type == XML_DOCUMENT_NODE)
13764 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13765#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013766 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013767 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013768 /*
13769 * When parsing in context, it makes no sense to add implied
13770 * elements like html/body/etc...
13771 */
13772 options |= HTML_PARSE_NOIMPLIED;
13773 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013774#endif
13775 else
13776 return(XML_ERR_INTERNAL_ERROR);
13777
13778 if (ctxt == NULL)
13779 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013780
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013781 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013782 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13783 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13784 * we must wait until the last moment to free the original one.
13785 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013786 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013787 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013788 xmlDictFree(ctxt->dict);
13789 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013790 } else
13791 options |= XML_PARSE_NODICT;
13792
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013793 if (doc->encoding != NULL) {
13794 xmlCharEncodingHandlerPtr hdlr;
13795
13796 if (ctxt->encoding != NULL)
13797 xmlFree((xmlChar *) ctxt->encoding);
13798 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13799
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013800 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013801 if (hdlr != NULL) {
13802 xmlSwitchToEncoding(ctxt, hdlr);
13803 } else {
13804 return(XML_ERR_UNSUPPORTED_ENCODING);
13805 }
13806 }
13807
Daniel Veillard37334572008-07-31 08:20:02 +000013808 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013809 xmlDetectSAX2(ctxt);
13810 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013811 /* parsing in context, i.e. as within existing content */
13812 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013813
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013814 fake = xmlNewComment(NULL);
13815 if (fake == NULL) {
13816 xmlFreeParserCtxt(ctxt);
13817 return(XML_ERR_NO_MEMORY);
13818 }
13819 xmlAddChild(node, fake);
13820
Daniel Veillard29b17482004-08-16 00:39:03 +000013821 if (node->type == XML_ELEMENT_NODE) {
13822 nodePush(ctxt, node);
13823 /*
13824 * initialize the SAX2 namespaces stack
13825 */
13826 cur = node;
13827 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13828 xmlNsPtr ns = cur->nsDef;
13829 const xmlChar *iprefix, *ihref;
13830
13831 while (ns != NULL) {
13832 if (ctxt->dict) {
13833 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13834 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13835 } else {
13836 iprefix = ns->prefix;
13837 ihref = ns->href;
13838 }
13839
13840 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13841 nsPush(ctxt, iprefix, ihref);
13842 nsnr++;
13843 }
13844 ns = ns->next;
13845 }
13846 cur = cur->parent;
13847 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013848 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013849
13850 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13851 /*
13852 * ID/IDREF registration will be done in xmlValidateElement below
13853 */
13854 ctxt->loadsubset |= XML_SKIP_IDS;
13855 }
13856
Daniel Veillard499cc922006-01-18 17:22:35 +000013857#ifdef LIBXML_HTML_ENABLED
13858 if (doc->type == XML_HTML_DOCUMENT_NODE)
13859 __htmlParseContent(ctxt);
13860 else
13861#endif
13862 xmlParseContent(ctxt);
13863
Daniel Veillard29b17482004-08-16 00:39:03 +000013864 nsPop(ctxt, nsnr);
13865 if ((RAW == '<') && (NXT(1) == '/')) {
13866 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13867 } else if (RAW != 0) {
13868 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13869 }
13870 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13871 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13872 ctxt->wellFormed = 0;
13873 }
13874
13875 if (!ctxt->wellFormed) {
13876 if (ctxt->errNo == 0)
13877 ret = XML_ERR_INTERNAL_ERROR;
13878 else
13879 ret = (xmlParserErrors)ctxt->errNo;
13880 } else {
13881 ret = XML_ERR_OK;
13882 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013883
Daniel Veillard29b17482004-08-16 00:39:03 +000013884 /*
13885 * Return the newly created nodeset after unlinking it from
13886 * the pseudo sibling.
13887 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013888
Daniel Veillard29b17482004-08-16 00:39:03 +000013889 cur = fake->next;
13890 fake->next = NULL;
13891 node->last = fake;
13892
13893 if (cur != NULL) {
13894 cur->prev = NULL;
13895 }
13896
13897 *lst = cur;
13898
13899 while (cur != NULL) {
13900 cur->parent = NULL;
13901 cur = cur->next;
13902 }
13903
13904 xmlUnlinkNode(fake);
13905 xmlFreeNode(fake);
13906
13907
13908 if (ret != XML_ERR_OK) {
13909 xmlFreeNodeList(*lst);
13910 *lst = NULL;
13911 }
William M. Brackc3f81342004-10-03 01:22:44 +000013912
William M. Brackb7b54de2004-10-06 16:38:01 +000013913 if (doc->dict != NULL)
13914 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013915 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013916
Daniel Veillard29b17482004-08-16 00:39:03 +000013917 return(ret);
13918#else /* !SAX2 */
13919 return(XML_ERR_INTERNAL_ERROR);
13920#endif
13921}
13922
Daniel Veillard81273902003-09-30 00:43:48 +000013923#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013924/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013925 * xmlParseBalancedChunkMemoryRecover:
13926 * @doc: the document the chunk pertains to
13927 * @sax: the SAX handler bloc (possibly NULL)
13928 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13929 * @depth: Used for loop detection, use 0
13930 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13931 * @lst: the return value for the set of parsed nodes
13932 * @recover: return nodes even if the data is broken (use 0)
13933 *
13934 *
13935 * Parse a well-balanced chunk of an XML document
13936 * called by the parser
13937 * The allowed sequence for the Well Balanced Chunk is the one defined by
13938 * the content production in the XML grammar:
13939 *
13940 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13941 *
13942 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13943 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013944 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013945 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013946 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13947 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013948 */
13949int
13950xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013951 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013952 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013953 xmlParserCtxtPtr ctxt;
13954 xmlDocPtr newDoc;
13955 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013956 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013957 int size;
13958 int ret = 0;
13959
Daniel Veillard0161e632008-08-28 15:36:32 +000013960 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013961 return(XML_ERR_ENTITY_LOOP);
13962 }
13963
13964
Daniel Veillardcda96922001-08-21 10:56:31 +000013965 if (lst != NULL)
13966 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013967 if (string == NULL)
13968 return(-1);
13969
13970 size = xmlStrlen(string);
13971
13972 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13973 if (ctxt == NULL) return(-1);
13974 ctxt->userData = ctxt;
13975 if (sax != NULL) {
13976 oldsax = ctxt->sax;
13977 ctxt->sax = sax;
13978 if (user_data != NULL)
13979 ctxt->userData = user_data;
13980 }
13981 newDoc = xmlNewDoc(BAD_CAST "1.0");
13982 if (newDoc == NULL) {
13983 xmlFreeParserCtxt(ctxt);
13984 return(-1);
13985 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013986 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013987 if ((doc != NULL) && (doc->dict != NULL)) {
13988 xmlDictFree(ctxt->dict);
13989 ctxt->dict = doc->dict;
13990 xmlDictReference(ctxt->dict);
13991 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13992 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13993 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13994 ctxt->dictNames = 1;
13995 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013996 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013997 }
Owen Taylor3473f882001-02-23 17:55:21 +000013998 if (doc != NULL) {
13999 newDoc->intSubset = doc->intSubset;
14000 newDoc->extSubset = doc->extSubset;
14001 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014002 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
14003 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000014004 if (sax != NULL)
14005 ctxt->sax = oldsax;
14006 xmlFreeParserCtxt(ctxt);
14007 newDoc->intSubset = NULL;
14008 newDoc->extSubset = NULL;
14009 xmlFreeDoc(newDoc);
14010 return(-1);
14011 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014012 xmlAddChild((xmlNodePtr) newDoc, newRoot);
14013 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000014014 if (doc == NULL) {
14015 ctxt->myDoc = newDoc;
14016 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000014017 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000014018 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000014019 /* Ensure that doc has XML spec namespace */
14020 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
14021 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000014022 }
14023 ctxt->instate = XML_PARSER_CONTENT;
14024 ctxt->depth = depth;
14025
14026 /*
14027 * Doing validity checking on chunk doesn't make sense
14028 */
14029 ctxt->validate = 0;
14030 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014031 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014032
Daniel Veillardb39bc392002-10-26 19:29:51 +000014033 if ( doc != NULL ){
14034 content = doc->children;
14035 doc->children = NULL;
14036 xmlParseContent(ctxt);
14037 doc->children = content;
14038 }
14039 else {
14040 xmlParseContent(ctxt);
14041 }
Owen Taylor3473f882001-02-23 17:55:21 +000014042 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014043 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014044 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014045 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014046 }
14047 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000014048 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014049 }
14050
14051 if (!ctxt->wellFormed) {
14052 if (ctxt->errNo == 0)
14053 ret = 1;
14054 else
14055 ret = ctxt->errNo;
14056 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000014057 ret = 0;
14058 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014059
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014060 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
14061 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000014062
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014063 /*
14064 * Return the newly created nodeset after unlinking it from
14065 * they pseudo parent.
14066 */
14067 cur = newDoc->children->children;
14068 *lst = cur;
14069 while (cur != NULL) {
14070 xmlSetTreeDoc(cur, doc);
14071 cur->parent = NULL;
14072 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000014073 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000014074 newDoc->children->children = NULL;
14075 }
Daniel Veillard0161e632008-08-28 15:36:32 +000014076
14077 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000014078 ctxt->sax = oldsax;
14079 xmlFreeParserCtxt(ctxt);
14080 newDoc->intSubset = NULL;
14081 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000014082 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014083 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000014084
Owen Taylor3473f882001-02-23 17:55:21 +000014085 return(ret);
14086}
14087
14088/**
14089 * xmlSAXParseEntity:
14090 * @sax: the SAX handler block
14091 * @filename: the filename
14092 *
14093 * parse an XML external entity out of context and build a tree.
14094 * It use the given SAX function block to handle the parsing callback.
14095 * If sax is NULL, fallback to the default DOM tree building routines.
14096 *
14097 * [78] extParsedEnt ::= TextDecl? content
14098 *
14099 * This correspond to a "Well Balanced" chunk
14100 *
14101 * Returns the resulting document tree
14102 */
14103
14104xmlDocPtr
14105xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
14106 xmlDocPtr ret;
14107 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014108
14109 ctxt = xmlCreateFileParserCtxt(filename);
14110 if (ctxt == NULL) {
14111 return(NULL);
14112 }
14113 if (sax != NULL) {
14114 if (ctxt->sax != NULL)
14115 xmlFree(ctxt->sax);
14116 ctxt->sax = sax;
14117 ctxt->userData = NULL;
14118 }
14119
Owen Taylor3473f882001-02-23 17:55:21 +000014120 xmlParseExtParsedEnt(ctxt);
14121
14122 if (ctxt->wellFormed)
14123 ret = ctxt->myDoc;
14124 else {
14125 ret = NULL;
14126 xmlFreeDoc(ctxt->myDoc);
14127 ctxt->myDoc = NULL;
14128 }
14129 if (sax != NULL)
14130 ctxt->sax = NULL;
14131 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000014132
Owen Taylor3473f882001-02-23 17:55:21 +000014133 return(ret);
14134}
14135
14136/**
14137 * xmlParseEntity:
14138 * @filename: the filename
14139 *
14140 * parse an XML external entity out of context and build a tree.
14141 *
14142 * [78] extParsedEnt ::= TextDecl? content
14143 *
14144 * This correspond to a "Well Balanced" chunk
14145 *
14146 * Returns the resulting document tree
14147 */
14148
14149xmlDocPtr
14150xmlParseEntity(const char *filename) {
14151 return(xmlSAXParseEntity(NULL, filename));
14152}
Daniel Veillard81273902003-09-30 00:43:48 +000014153#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014154
14155/**
Rob Richards9c0aa472009-03-26 18:10:19 +000014156 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000014157 * @URL: the entity URL
14158 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000014159 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000014160 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000014161 *
14162 * Create a parser context for an external entity
14163 * Automatic support for ZLIB/Compress compressed document is provided
14164 * by default if found at compile-time.
14165 *
14166 * Returns the new parser context or NULL
14167 */
Rob Richards9c0aa472009-03-26 18:10:19 +000014168static xmlParserCtxtPtr
14169xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14170 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000014171 xmlParserCtxtPtr ctxt;
14172 xmlParserInputPtr inputStream;
14173 char *directory = NULL;
14174 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000014175
Owen Taylor3473f882001-02-23 17:55:21 +000014176 ctxt = xmlNewParserCtxt();
14177 if (ctxt == NULL) {
14178 return(NULL);
14179 }
14180
Daniel Veillard48247b42009-07-10 16:12:46 +020014181 if (pctx != NULL) {
14182 ctxt->options = pctx->options;
14183 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014184 }
14185
Owen Taylor3473f882001-02-23 17:55:21 +000014186 uri = xmlBuildURI(URL, base);
14187
14188 if (uri == NULL) {
14189 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14190 if (inputStream == NULL) {
14191 xmlFreeParserCtxt(ctxt);
14192 return(NULL);
14193 }
14194
14195 inputPush(ctxt, inputStream);
14196
14197 if ((ctxt->directory == NULL) && (directory == NULL))
14198 directory = xmlParserGetDirectory((char *)URL);
14199 if ((ctxt->directory == NULL) && (directory != NULL))
14200 ctxt->directory = directory;
14201 } else {
14202 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14203 if (inputStream == NULL) {
14204 xmlFree(uri);
14205 xmlFreeParserCtxt(ctxt);
14206 return(NULL);
14207 }
14208
14209 inputPush(ctxt, inputStream);
14210
14211 if ((ctxt->directory == NULL) && (directory == NULL))
14212 directory = xmlParserGetDirectory((char *)uri);
14213 if ((ctxt->directory == NULL) && (directory != NULL))
14214 ctxt->directory = directory;
14215 xmlFree(uri);
14216 }
Owen Taylor3473f882001-02-23 17:55:21 +000014217 return(ctxt);
14218}
14219
Rob Richards9c0aa472009-03-26 18:10:19 +000014220/**
14221 * xmlCreateEntityParserCtxt:
14222 * @URL: the entity URL
14223 * @ID: the entity PUBLIC ID
14224 * @base: a possible base for the target URI
14225 *
14226 * Create a parser context for an external entity
14227 * Automatic support for ZLIB/Compress compressed document is provided
14228 * by default if found at compile-time.
14229 *
14230 * Returns the new parser context or NULL
14231 */
14232xmlParserCtxtPtr
14233xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14234 const xmlChar *base) {
14235 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14236
14237}
14238
Owen Taylor3473f882001-02-23 17:55:21 +000014239/************************************************************************
14240 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014241 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014242 * *
14243 ************************************************************************/
14244
14245/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014246 * xmlCreateURLParserCtxt:
14247 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014248 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014249 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014250 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014251 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014252 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014253 *
14254 * Returns the new parser context or NULL
14255 */
14256xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014257xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014258{
14259 xmlParserCtxtPtr ctxt;
14260 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014261 char *directory = NULL;
14262
Owen Taylor3473f882001-02-23 17:55:21 +000014263 ctxt = xmlNewParserCtxt();
14264 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014265 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014266 return(NULL);
14267 }
14268
Daniel Veillarddf292f72005-01-16 19:00:15 +000014269 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014270 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014271 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014272
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014273 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014274 if (inputStream == NULL) {
14275 xmlFreeParserCtxt(ctxt);
14276 return(NULL);
14277 }
14278
Owen Taylor3473f882001-02-23 17:55:21 +000014279 inputPush(ctxt, inputStream);
14280 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014281 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014282 if ((ctxt->directory == NULL) && (directory != NULL))
14283 ctxt->directory = directory;
14284
14285 return(ctxt);
14286}
14287
Daniel Veillard61b93382003-11-03 14:28:31 +000014288/**
14289 * xmlCreateFileParserCtxt:
14290 * @filename: the filename
14291 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014292 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014293 * Automatic support for ZLIB/Compress compressed document is provided
14294 * by default if found at compile-time.
14295 *
14296 * Returns the new parser context or NULL
14297 */
14298xmlParserCtxtPtr
14299xmlCreateFileParserCtxt(const char *filename)
14300{
14301 return(xmlCreateURLParserCtxt(filename, 0));
14302}
14303
Daniel Veillard81273902003-09-30 00:43:48 +000014304#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014305/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014306 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014307 * @sax: the SAX handler block
14308 * @filename: the filename
14309 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14310 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014311 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014312 *
14313 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14314 * compressed document is provided by default if found at compile-time.
14315 * It use the given SAX function block to handle the parsing callback.
14316 * If sax is NULL, fallback to the default DOM tree building routines.
14317 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014318 * User data (void *) is stored within the parser context in the
14319 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014320 *
Owen Taylor3473f882001-02-23 17:55:21 +000014321 * Returns the resulting document tree
14322 */
14323
14324xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014325xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14326 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014327 xmlDocPtr ret;
14328 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014329
Daniel Veillard635ef722001-10-29 11:48:19 +000014330 xmlInitParser();
14331
Owen Taylor3473f882001-02-23 17:55:21 +000014332 ctxt = xmlCreateFileParserCtxt(filename);
14333 if (ctxt == NULL) {
14334 return(NULL);
14335 }
14336 if (sax != NULL) {
14337 if (ctxt->sax != NULL)
14338 xmlFree(ctxt->sax);
14339 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014340 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014341 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014342 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014343 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014344 }
Owen Taylor3473f882001-02-23 17:55:21 +000014345
Daniel Veillard37d2d162008-03-14 10:54:00 +000014346 if (ctxt->directory == NULL)
14347 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014348
Daniel Veillarddad3f682002-11-17 16:47:27 +000014349 ctxt->recovery = recovery;
14350
Owen Taylor3473f882001-02-23 17:55:21 +000014351 xmlParseDocument(ctxt);
14352
William M. Brackc07329e2003-09-08 01:57:30 +000014353 if ((ctxt->wellFormed) || recovery) {
14354 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014355 if (ret != NULL) {
14356 if (ctxt->input->buf->compressed > 0)
14357 ret->compression = 9;
14358 else
14359 ret->compression = ctxt->input->buf->compressed;
14360 }
William M. Brackc07329e2003-09-08 01:57:30 +000014361 }
Owen Taylor3473f882001-02-23 17:55:21 +000014362 else {
14363 ret = NULL;
14364 xmlFreeDoc(ctxt->myDoc);
14365 ctxt->myDoc = NULL;
14366 }
14367 if (sax != NULL)
14368 ctxt->sax = NULL;
14369 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014370
Owen Taylor3473f882001-02-23 17:55:21 +000014371 return(ret);
14372}
14373
14374/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014375 * xmlSAXParseFile:
14376 * @sax: the SAX handler block
14377 * @filename: the filename
14378 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14379 * documents
14380 *
14381 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14382 * compressed document is provided by default if found at compile-time.
14383 * It use the given SAX function block to handle the parsing callback.
14384 * If sax is NULL, fallback to the default DOM tree building routines.
14385 *
14386 * Returns the resulting document tree
14387 */
14388
14389xmlDocPtr
14390xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14391 int recovery) {
14392 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14393}
14394
14395/**
Owen Taylor3473f882001-02-23 17:55:21 +000014396 * xmlRecoverDoc:
14397 * @cur: a pointer to an array of xmlChar
14398 *
14399 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014400 * In the case the document is not Well Formed, a attempt to build a
14401 * tree is tried anyway
14402 *
14403 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014404 */
14405
14406xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014407xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014408 return(xmlSAXParseDoc(NULL, cur, 1));
14409}
14410
14411/**
14412 * xmlParseFile:
14413 * @filename: the filename
14414 *
14415 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14416 * compressed document is provided by default if found at compile-time.
14417 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014418 * Returns the resulting document tree if the file was wellformed,
14419 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014420 */
14421
14422xmlDocPtr
14423xmlParseFile(const char *filename) {
14424 return(xmlSAXParseFile(NULL, filename, 0));
14425}
14426
14427/**
14428 * xmlRecoverFile:
14429 * @filename: the filename
14430 *
14431 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14432 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014433 * In the case the document is not Well Formed, it attempts to build
14434 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014435 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014436 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014437 */
14438
14439xmlDocPtr
14440xmlRecoverFile(const char *filename) {
14441 return(xmlSAXParseFile(NULL, filename, 1));
14442}
14443
14444
14445/**
14446 * xmlSetupParserForBuffer:
14447 * @ctxt: an XML parser context
14448 * @buffer: a xmlChar * buffer
14449 * @filename: a file name
14450 *
14451 * Setup the parser context to parse a new buffer; Clears any prior
14452 * contents from the parser context. The buffer parameter must not be
14453 * NULL, but the filename parameter can be
14454 */
14455void
14456xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14457 const char* filename)
14458{
14459 xmlParserInputPtr input;
14460
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014461 if ((ctxt == NULL) || (buffer == NULL))
14462 return;
14463
Owen Taylor3473f882001-02-23 17:55:21 +000014464 input = xmlNewInputStream(ctxt);
14465 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014466 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014467 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014468 return;
14469 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014470
Owen Taylor3473f882001-02-23 17:55:21 +000014471 xmlClearParserCtxt(ctxt);
14472 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014473 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014474 input->base = buffer;
14475 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014476 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014477 inputPush(ctxt, input);
14478}
14479
14480/**
14481 * xmlSAXUserParseFile:
14482 * @sax: a SAX handler
14483 * @user_data: The user data returned on SAX callbacks
14484 * @filename: a file name
14485 *
14486 * parse an XML file and call the given SAX handler routines.
14487 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014488 *
Owen Taylor3473f882001-02-23 17:55:21 +000014489 * Returns 0 in case of success or a error number otherwise
14490 */
14491int
14492xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14493 const char *filename) {
14494 int ret = 0;
14495 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014496
Owen Taylor3473f882001-02-23 17:55:21 +000014497 ctxt = xmlCreateFileParserCtxt(filename);
14498 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014499 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014500 xmlFree(ctxt->sax);
14501 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014502 xmlDetectSAX2(ctxt);
14503
Owen Taylor3473f882001-02-23 17:55:21 +000014504 if (user_data != NULL)
14505 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014506
Owen Taylor3473f882001-02-23 17:55:21 +000014507 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014508
Owen Taylor3473f882001-02-23 17:55:21 +000014509 if (ctxt->wellFormed)
14510 ret = 0;
14511 else {
14512 if (ctxt->errNo != 0)
14513 ret = ctxt->errNo;
14514 else
14515 ret = -1;
14516 }
14517 if (sax != NULL)
14518 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014519 if (ctxt->myDoc != NULL) {
14520 xmlFreeDoc(ctxt->myDoc);
14521 ctxt->myDoc = NULL;
14522 }
Owen Taylor3473f882001-02-23 17:55:21 +000014523 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014524
Owen Taylor3473f882001-02-23 17:55:21 +000014525 return ret;
14526}
Daniel Veillard81273902003-09-30 00:43:48 +000014527#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014528
14529/************************************************************************
14530 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014531 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014532 * *
14533 ************************************************************************/
14534
14535/**
14536 * xmlCreateMemoryParserCtxt:
14537 * @buffer: a pointer to a char array
14538 * @size: the size of the array
14539 *
14540 * Create a parser context for an XML in-memory document.
14541 *
14542 * Returns the new parser context or NULL
14543 */
14544xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014545xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014546 xmlParserCtxtPtr ctxt;
14547 xmlParserInputPtr input;
14548 xmlParserInputBufferPtr buf;
14549
14550 if (buffer == NULL)
14551 return(NULL);
14552 if (size <= 0)
14553 return(NULL);
14554
14555 ctxt = xmlNewParserCtxt();
14556 if (ctxt == NULL)
14557 return(NULL);
14558
Daniel Veillard53350552003-09-18 13:35:51 +000014559 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014560 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014561 if (buf == NULL) {
14562 xmlFreeParserCtxt(ctxt);
14563 return(NULL);
14564 }
Owen Taylor3473f882001-02-23 17:55:21 +000014565
14566 input = xmlNewInputStream(ctxt);
14567 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014568 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014569 xmlFreeParserCtxt(ctxt);
14570 return(NULL);
14571 }
14572
14573 input->filename = NULL;
14574 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014575 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014576
14577 inputPush(ctxt, input);
14578 return(ctxt);
14579}
14580
Daniel Veillard81273902003-09-30 00:43:48 +000014581#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014582/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014583 * xmlSAXParseMemoryWithData:
14584 * @sax: the SAX handler block
14585 * @buffer: an pointer to a char array
14586 * @size: the size of the array
14587 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14588 * documents
14589 * @data: the userdata
14590 *
14591 * parse an XML in-memory block and use the given SAX function block
14592 * to handle the parsing callback. If sax is NULL, fallback to the default
14593 * DOM tree building routines.
14594 *
14595 * User data (void *) is stored within the parser context in the
14596 * context's _private member, so it is available nearly everywhere in libxml
14597 *
14598 * Returns the resulting document tree
14599 */
14600
14601xmlDocPtr
14602xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14603 int size, int recovery, void *data) {
14604 xmlDocPtr ret;
14605 xmlParserCtxtPtr ctxt;
14606
Daniel Veillardab2a7632009-07-09 08:45:03 +020014607 xmlInitParser();
14608
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014609 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14610 if (ctxt == NULL) return(NULL);
14611 if (sax != NULL) {
14612 if (ctxt->sax != NULL)
14613 xmlFree(ctxt->sax);
14614 ctxt->sax = sax;
14615 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014616 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014617 if (data!=NULL) {
14618 ctxt->_private=data;
14619 }
14620
Daniel Veillardadba5f12003-04-04 16:09:01 +000014621 ctxt->recovery = recovery;
14622
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014623 xmlParseDocument(ctxt);
14624
14625 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14626 else {
14627 ret = NULL;
14628 xmlFreeDoc(ctxt->myDoc);
14629 ctxt->myDoc = NULL;
14630 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014631 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014632 ctxt->sax = NULL;
14633 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014634
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014635 return(ret);
14636}
14637
14638/**
Owen Taylor3473f882001-02-23 17:55:21 +000014639 * xmlSAXParseMemory:
14640 * @sax: the SAX handler block
14641 * @buffer: an pointer to a char array
14642 * @size: the size of the array
14643 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14644 * documents
14645 *
14646 * parse an XML in-memory block and use the given SAX function block
14647 * to handle the parsing callback. If sax is NULL, fallback to the default
14648 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014649 *
Owen Taylor3473f882001-02-23 17:55:21 +000014650 * Returns the resulting document tree
14651 */
14652xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014653xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14654 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014655 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014656}
14657
14658/**
14659 * xmlParseMemory:
14660 * @buffer: an pointer to a char array
14661 * @size: the size of the array
14662 *
14663 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014664 *
Owen Taylor3473f882001-02-23 17:55:21 +000014665 * Returns the resulting document tree
14666 */
14667
Daniel Veillard50822cb2001-07-26 20:05:51 +000014668xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014669 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14670}
14671
14672/**
14673 * xmlRecoverMemory:
14674 * @buffer: an pointer to a char array
14675 * @size: the size of the array
14676 *
14677 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014678 * In the case the document is not Well Formed, an attempt to
14679 * build a tree is tried anyway
14680 *
14681 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014682 */
14683
Daniel Veillard50822cb2001-07-26 20:05:51 +000014684xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014685 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14686}
14687
14688/**
14689 * xmlSAXUserParseMemory:
14690 * @sax: a SAX handler
14691 * @user_data: The user data returned on SAX callbacks
14692 * @buffer: an in-memory XML document input
14693 * @size: the length of the XML document in bytes
14694 *
14695 * A better SAX parsing routine.
14696 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014697 *
Owen Taylor3473f882001-02-23 17:55:21 +000014698 * Returns 0 in case of success or a error number otherwise
14699 */
14700int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014701 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014702 int ret = 0;
14703 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014704
14705 xmlInitParser();
14706
Owen Taylor3473f882001-02-23 17:55:21 +000014707 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14708 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014709 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14710 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014711 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014712 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014713
Daniel Veillard30211a02001-04-26 09:33:18 +000014714 if (user_data != NULL)
14715 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014716
Owen Taylor3473f882001-02-23 17:55:21 +000014717 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014718
Owen Taylor3473f882001-02-23 17:55:21 +000014719 if (ctxt->wellFormed)
14720 ret = 0;
14721 else {
14722 if (ctxt->errNo != 0)
14723 ret = ctxt->errNo;
14724 else
14725 ret = -1;
14726 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014727 if (sax != NULL)
14728 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014729 if (ctxt->myDoc != NULL) {
14730 xmlFreeDoc(ctxt->myDoc);
14731 ctxt->myDoc = NULL;
14732 }
Owen Taylor3473f882001-02-23 17:55:21 +000014733 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014734
Owen Taylor3473f882001-02-23 17:55:21 +000014735 return ret;
14736}
Daniel Veillard81273902003-09-30 00:43:48 +000014737#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014738
14739/**
14740 * xmlCreateDocParserCtxt:
14741 * @cur: a pointer to an array of xmlChar
14742 *
14743 * Creates a parser context for an XML in-memory document.
14744 *
14745 * Returns the new parser context or NULL
14746 */
14747xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014748xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014749 int len;
14750
14751 if (cur == NULL)
14752 return(NULL);
14753 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014754 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014755}
14756
Daniel Veillard81273902003-09-30 00:43:48 +000014757#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014758/**
14759 * xmlSAXParseDoc:
14760 * @sax: the SAX handler block
14761 * @cur: a pointer to an array of xmlChar
14762 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14763 * documents
14764 *
14765 * parse an XML in-memory document and build a tree.
14766 * It use the given SAX function block to handle the parsing callback.
14767 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014768 *
Owen Taylor3473f882001-02-23 17:55:21 +000014769 * Returns the resulting document tree
14770 */
14771
14772xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014773xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014774 xmlDocPtr ret;
14775 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014776 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014777
Daniel Veillard38936062004-11-04 17:45:11 +000014778 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014779
14780
14781 ctxt = xmlCreateDocParserCtxt(cur);
14782 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014783 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014784 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014785 ctxt->sax = sax;
14786 ctxt->userData = NULL;
14787 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014788 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014789
14790 xmlParseDocument(ctxt);
14791 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14792 else {
14793 ret = NULL;
14794 xmlFreeDoc(ctxt->myDoc);
14795 ctxt->myDoc = NULL;
14796 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014797 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014798 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014799 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014800
Owen Taylor3473f882001-02-23 17:55:21 +000014801 return(ret);
14802}
14803
14804/**
14805 * xmlParseDoc:
14806 * @cur: a pointer to an array of xmlChar
14807 *
14808 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014809 *
Owen Taylor3473f882001-02-23 17:55:21 +000014810 * Returns the resulting document tree
14811 */
14812
14813xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014814xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014815 return(xmlSAXParseDoc(NULL, cur, 0));
14816}
Daniel Veillard81273902003-09-30 00:43:48 +000014817#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014818
Daniel Veillard81273902003-09-30 00:43:48 +000014819#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014820/************************************************************************
14821 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014822 * Specific function to keep track of entities references *
14823 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014824 * *
14825 ************************************************************************/
14826
14827static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14828
14829/**
14830 * xmlAddEntityReference:
14831 * @ent : A valid entity
14832 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014833 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014834 *
14835 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14836 */
14837static void
14838xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14839 xmlNodePtr lastNode)
14840{
14841 if (xmlEntityRefFunc != NULL) {
14842 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14843 }
14844}
14845
14846
14847/**
14848 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014849 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014850 *
14851 * Set the function to call call back when a xml reference has been made
14852 */
14853void
14854xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14855{
14856 xmlEntityRefFunc = func;
14857}
Daniel Veillard81273902003-09-30 00:43:48 +000014858#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014859
14860/************************************************************************
14861 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014862 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014863 * *
14864 ************************************************************************/
14865
14866#ifdef LIBXML_XPATH_ENABLED
14867#include <libxml/xpath.h>
14868#endif
14869
Daniel Veillardffa3c742005-07-21 13:24:09 +000014870extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014871static int xmlParserInitialized = 0;
14872
14873/**
14874 * xmlInitParser:
14875 *
14876 * Initialization function for the XML parser.
14877 * This is not reentrant. Call once before processing in case of
14878 * use in multithreaded programs.
14879 */
14880
14881void
14882xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014883 if (xmlParserInitialized != 0)
14884 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014885
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014886#ifdef LIBXML_THREAD_ENABLED
14887 __xmlGlobalInitMutexLock();
14888 if (xmlParserInitialized == 0) {
14889#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014890 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014891 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014892 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14893 (xmlGenericError == NULL))
14894 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014895 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014896 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014897 xmlInitCharEncodingHandlers();
14898 xmlDefaultSAXHandlerInit();
14899 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014900#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014901 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014902#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014903#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014904 htmlInitAutoClose();
14905 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014906#endif
14907#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014908 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014909#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014910 xmlParserInitialized = 1;
14911#ifdef LIBXML_THREAD_ENABLED
14912 }
14913 __xmlGlobalInitMutexUnlock();
14914#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014915}
14916
14917/**
14918 * xmlCleanupParser:
14919 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014920 * This function name is somewhat misleading. It does not clean up
14921 * parser state, it cleans up memory allocated by the library itself.
14922 * It is a cleanup function for the XML library. It tries to reclaim all
14923 * related global memory allocated for the library processing.
14924 * It doesn't deallocate any document related memory. One should
14925 * call xmlCleanupParser() only when the process has finished using
14926 * the library and all XML/HTML documents built with it.
14927 * See also xmlInitParser() which has the opposite function of preparing
14928 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014929 *
14930 * WARNING: if your application is multithreaded or has plugin support
14931 * calling this may crash the application if another thread or
14932 * a plugin is still using libxml2. It's sometimes very hard to
14933 * guess if libxml2 is in use in the application, some libraries
14934 * or plugins may use it without notice. In case of doubt abstain
14935 * from calling this function or do it just before calling exit()
14936 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014937 */
14938
14939void
14940xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014941 if (!xmlParserInitialized)
14942 return;
14943
Owen Taylor3473f882001-02-23 17:55:21 +000014944 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014945#ifdef LIBXML_CATALOG_ENABLED
14946 xmlCatalogCleanup();
14947#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014948 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014949 xmlCleanupInputCallbacks();
14950#ifdef LIBXML_OUTPUT_ENABLED
14951 xmlCleanupOutputCallbacks();
14952#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014953#ifdef LIBXML_SCHEMAS_ENABLED
14954 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014955 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014956#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014957 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014958 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014959 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014960 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014961 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014962}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014963
14964/************************************************************************
14965 * *
14966 * New set (2.6.0) of simpler and more flexible APIs *
14967 * *
14968 ************************************************************************/
14969
14970/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014971 * DICT_FREE:
14972 * @str: a string
14973 *
14974 * Free a string if it is not owned by the "dict" dictionnary in the
14975 * current scope
14976 */
14977#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014978 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014979 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14980 xmlFree((char *)(str));
14981
14982/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014983 * xmlCtxtReset:
14984 * @ctxt: an XML parser context
14985 *
14986 * Reset a parser context
14987 */
14988void
14989xmlCtxtReset(xmlParserCtxtPtr ctxt)
14990{
14991 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014992 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014993
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014994 if (ctxt == NULL)
14995 return;
14996
14997 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014998
14999 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
15000 xmlFreeInputStream(input);
15001 }
15002 ctxt->inputNr = 0;
15003 ctxt->input = NULL;
15004
15005 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000015006 if (ctxt->spaceTab != NULL) {
15007 ctxt->spaceTab[0] = -1;
15008 ctxt->space = &ctxt->spaceTab[0];
15009 } else {
15010 ctxt->space = NULL;
15011 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015012
15013
15014 ctxt->nodeNr = 0;
15015 ctxt->node = NULL;
15016
15017 ctxt->nameNr = 0;
15018 ctxt->name = NULL;
15019
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015020 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015021 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015022 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015023 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015024 DICT_FREE(ctxt->directory);
15025 ctxt->directory = NULL;
15026 DICT_FREE(ctxt->extSubURI);
15027 ctxt->extSubURI = NULL;
15028 DICT_FREE(ctxt->extSubSystem);
15029 ctxt->extSubSystem = NULL;
15030 if (ctxt->myDoc != NULL)
15031 xmlFreeDoc(ctxt->myDoc);
15032 ctxt->myDoc = NULL;
15033
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015034 ctxt->standalone = -1;
15035 ctxt->hasExternalSubset = 0;
15036 ctxt->hasPErefs = 0;
15037 ctxt->html = 0;
15038 ctxt->external = 0;
15039 ctxt->instate = XML_PARSER_START;
15040 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015041
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015042 ctxt->wellFormed = 1;
15043 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000015044 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015045 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015046#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015047 ctxt->vctxt.userData = ctxt;
15048 ctxt->vctxt.error = xmlParserValidityError;
15049 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000015050#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015051 ctxt->record_info = 0;
15052 ctxt->nbChars = 0;
15053 ctxt->checkIndex = 0;
15054 ctxt->inSubset = 0;
15055 ctxt->errNo = XML_ERR_OK;
15056 ctxt->depth = 0;
15057 ctxt->charset = XML_CHAR_ENCODING_UTF8;
15058 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000015059 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000015060 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080015061 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015062 xmlInitNodeInfoSeq(&ctxt->node_seq);
15063
15064 if (ctxt->attsDefault != NULL) {
15065 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
15066 ctxt->attsDefault = NULL;
15067 }
15068 if (ctxt->attsSpecial != NULL) {
15069 xmlHashFree(ctxt->attsSpecial, NULL);
15070 ctxt->attsSpecial = NULL;
15071 }
15072
Daniel Veillard4432df22003-09-28 18:58:27 +000015073#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015074 if (ctxt->catalogs != NULL)
15075 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000015076#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000015077 if (ctxt->lastError.code != XML_ERR_OK)
15078 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015079}
15080
15081/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015082 * xmlCtxtResetPush:
15083 * @ctxt: an XML parser context
15084 * @chunk: a pointer to an array of chars
15085 * @size: number of chars in the array
15086 * @filename: an optional file name or URI
15087 * @encoding: the document encoding, or NULL
15088 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015089 * Reset a push parser context
15090 *
15091 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015092 */
15093int
15094xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
15095 int size, const char *filename, const char *encoding)
15096{
15097 xmlParserInputPtr inputStream;
15098 xmlParserInputBufferPtr buf;
15099 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
15100
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000015101 if (ctxt == NULL)
15102 return(1);
15103
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015104 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
15105 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
15106
15107 buf = xmlAllocParserInputBuffer(enc);
15108 if (buf == NULL)
15109 return(1);
15110
15111 if (ctxt == NULL) {
15112 xmlFreeParserInputBuffer(buf);
15113 return(1);
15114 }
15115
15116 xmlCtxtReset(ctxt);
15117
15118 if (ctxt->pushTab == NULL) {
15119 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15120 sizeof(xmlChar *));
15121 if (ctxt->pushTab == NULL) {
15122 xmlErrMemory(ctxt, NULL);
15123 xmlFreeParserInputBuffer(buf);
15124 return(1);
15125 }
15126 }
15127
15128 if (filename == NULL) {
15129 ctxt->directory = NULL;
15130 } else {
15131 ctxt->directory = xmlParserGetDirectory(filename);
15132 }
15133
15134 inputStream = xmlNewInputStream(ctxt);
15135 if (inputStream == NULL) {
15136 xmlFreeParserInputBuffer(buf);
15137 return(1);
15138 }
15139
15140 if (filename == NULL)
15141 inputStream->filename = NULL;
15142 else
15143 inputStream->filename = (char *)
15144 xmlCanonicPath((const xmlChar *) filename);
15145 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080015146 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015147
15148 inputPush(ctxt, inputStream);
15149
15150 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15151 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015152 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15153 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015154
15155 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15156
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015157 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015158#ifdef DEBUG_PUSH
15159 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15160#endif
15161 }
15162
15163 if (encoding != NULL) {
15164 xmlCharEncodingHandlerPtr hdlr;
15165
Daniel Veillard37334572008-07-31 08:20:02 +000015166 if (ctxt->encoding != NULL)
15167 xmlFree((xmlChar *) ctxt->encoding);
15168 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15169
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015170 hdlr = xmlFindCharEncodingHandler(encoding);
15171 if (hdlr != NULL) {
15172 xmlSwitchToEncoding(ctxt, hdlr);
15173 } else {
15174 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15175 "Unsupported encoding %s\n", BAD_CAST encoding);
15176 }
15177 } else if (enc != XML_CHAR_ENCODING_NONE) {
15178 xmlSwitchEncoding(ctxt, enc);
15179 }
15180
15181 return(0);
15182}
15183
Daniel Veillard37334572008-07-31 08:20:02 +000015184
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015185/**
Daniel Veillard37334572008-07-31 08:20:02 +000015186 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015187 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015188 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015189 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015190 *
15191 * Applies the options to the parser context
15192 *
15193 * Returns 0 in case of success, the set of unknown or unimplemented options
15194 * in case of error.
15195 */
Daniel Veillard37334572008-07-31 08:20:02 +000015196static int
15197xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015198{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015199 if (ctxt == NULL)
15200 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015201 if (encoding != NULL) {
15202 if (ctxt->encoding != NULL)
15203 xmlFree((xmlChar *) ctxt->encoding);
15204 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15205 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015206 if (options & XML_PARSE_RECOVER) {
15207 ctxt->recovery = 1;
15208 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015209 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015210 } else
15211 ctxt->recovery = 0;
15212 if (options & XML_PARSE_DTDLOAD) {
15213 ctxt->loadsubset = XML_DETECT_IDS;
15214 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015215 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015216 } else
15217 ctxt->loadsubset = 0;
15218 if (options & XML_PARSE_DTDATTR) {
15219 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15220 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015221 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015222 }
15223 if (options & XML_PARSE_NOENT) {
15224 ctxt->replaceEntities = 1;
15225 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15226 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015227 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015228 } else
15229 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015230 if (options & XML_PARSE_PEDANTIC) {
15231 ctxt->pedantic = 1;
15232 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015233 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015234 } else
15235 ctxt->pedantic = 0;
15236 if (options & XML_PARSE_NOBLANKS) {
15237 ctxt->keepBlanks = 0;
15238 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15239 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015240 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015241 } else
15242 ctxt->keepBlanks = 1;
15243 if (options & XML_PARSE_DTDVALID) {
15244 ctxt->validate = 1;
15245 if (options & XML_PARSE_NOWARNING)
15246 ctxt->vctxt.warning = NULL;
15247 if (options & XML_PARSE_NOERROR)
15248 ctxt->vctxt.error = NULL;
15249 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015250 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015251 } else
15252 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015253 if (options & XML_PARSE_NOWARNING) {
15254 ctxt->sax->warning = NULL;
15255 options -= XML_PARSE_NOWARNING;
15256 }
15257 if (options & XML_PARSE_NOERROR) {
15258 ctxt->sax->error = NULL;
15259 ctxt->sax->fatalError = NULL;
15260 options -= XML_PARSE_NOERROR;
15261 }
Daniel Veillard81273902003-09-30 00:43:48 +000015262#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015263 if (options & XML_PARSE_SAX1) {
15264 ctxt->sax->startElement = xmlSAX2StartElement;
15265 ctxt->sax->endElement = xmlSAX2EndElement;
15266 ctxt->sax->startElementNs = NULL;
15267 ctxt->sax->endElementNs = NULL;
15268 ctxt->sax->initialized = 1;
15269 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015270 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015271 }
Daniel Veillard81273902003-09-30 00:43:48 +000015272#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015273 if (options & XML_PARSE_NODICT) {
15274 ctxt->dictNames = 0;
15275 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015276 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015277 } else {
15278 ctxt->dictNames = 1;
15279 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015280 if (options & XML_PARSE_NOCDATA) {
15281 ctxt->sax->cdataBlock = NULL;
15282 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015283 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015284 }
15285 if (options & XML_PARSE_NSCLEAN) {
15286 ctxt->options |= XML_PARSE_NSCLEAN;
15287 options -= XML_PARSE_NSCLEAN;
15288 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015289 if (options & XML_PARSE_NONET) {
15290 ctxt->options |= XML_PARSE_NONET;
15291 options -= XML_PARSE_NONET;
15292 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015293 if (options & XML_PARSE_COMPACT) {
15294 ctxt->options |= XML_PARSE_COMPACT;
15295 options -= XML_PARSE_COMPACT;
15296 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015297 if (options & XML_PARSE_OLD10) {
15298 ctxt->options |= XML_PARSE_OLD10;
15299 options -= XML_PARSE_OLD10;
15300 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015301 if (options & XML_PARSE_NOBASEFIX) {
15302 ctxt->options |= XML_PARSE_NOBASEFIX;
15303 options -= XML_PARSE_NOBASEFIX;
15304 }
15305 if (options & XML_PARSE_HUGE) {
15306 ctxt->options |= XML_PARSE_HUGE;
15307 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015308 if (ctxt->dict != NULL)
15309 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015310 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015311 if (options & XML_PARSE_OLDSAX) {
15312 ctxt->options |= XML_PARSE_OLDSAX;
15313 options -= XML_PARSE_OLDSAX;
15314 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015315 if (options & XML_PARSE_IGNORE_ENC) {
15316 ctxt->options |= XML_PARSE_IGNORE_ENC;
15317 options -= XML_PARSE_IGNORE_ENC;
15318 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015319 if (options & XML_PARSE_BIG_LINES) {
15320 ctxt->options |= XML_PARSE_BIG_LINES;
15321 options -= XML_PARSE_BIG_LINES;
15322 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015323 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015324 return (options);
15325}
15326
15327/**
Daniel Veillard37334572008-07-31 08:20:02 +000015328 * xmlCtxtUseOptions:
15329 * @ctxt: an XML parser context
15330 * @options: a combination of xmlParserOption
15331 *
15332 * Applies the options to the parser context
15333 *
15334 * Returns 0 in case of success, the set of unknown or unimplemented options
15335 * in case of error.
15336 */
15337int
15338xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15339{
15340 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15341}
15342
15343/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015344 * xmlDoRead:
15345 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015346 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015347 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015348 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015349 * @reuse: keep the context for reuse
15350 *
15351 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015352 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015353 * Returns the resulting document tree or NULL
15354 */
15355static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015356xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15357 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015358{
15359 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015360
15361 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015362 if (encoding != NULL) {
15363 xmlCharEncodingHandlerPtr hdlr;
15364
15365 hdlr = xmlFindCharEncodingHandler(encoding);
15366 if (hdlr != NULL)
15367 xmlSwitchToEncoding(ctxt, hdlr);
15368 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015369 if ((URL != NULL) && (ctxt->input != NULL) &&
15370 (ctxt->input->filename == NULL))
15371 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015372 xmlParseDocument(ctxt);
15373 if ((ctxt->wellFormed) || ctxt->recovery)
15374 ret = ctxt->myDoc;
15375 else {
15376 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015377 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015378 xmlFreeDoc(ctxt->myDoc);
15379 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015380 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015381 ctxt->myDoc = NULL;
15382 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015383 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015384 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015385
15386 return (ret);
15387}
15388
15389/**
15390 * xmlReadDoc:
15391 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015392 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015393 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015394 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015395 *
15396 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015397 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015398 * Returns the resulting document tree
15399 */
15400xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015401xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015402{
15403 xmlParserCtxtPtr ctxt;
15404
15405 if (cur == NULL)
15406 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015407 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015408
15409 ctxt = xmlCreateDocParserCtxt(cur);
15410 if (ctxt == NULL)
15411 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015412 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015413}
15414
15415/**
15416 * xmlReadFile:
15417 * @filename: a file or URL
15418 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015419 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015420 *
15421 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015422 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015423 * Returns the resulting document tree
15424 */
15425xmlDocPtr
15426xmlReadFile(const char *filename, const char *encoding, int options)
15427{
15428 xmlParserCtxtPtr ctxt;
15429
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015430 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015431 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015432 if (ctxt == NULL)
15433 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015434 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015435}
15436
15437/**
15438 * xmlReadMemory:
15439 * @buffer: a pointer to a char array
15440 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015441 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015442 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015443 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015444 *
15445 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015446 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015447 * Returns the resulting document tree
15448 */
15449xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015450xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015451{
15452 xmlParserCtxtPtr ctxt;
15453
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015454 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015455 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15456 if (ctxt == NULL)
15457 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015458 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015459}
15460
15461/**
15462 * xmlReadFd:
15463 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015464 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015465 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015466 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015467 *
15468 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015469 * NOTE that the file descriptor will not be closed when the
15470 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015471 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015472 * Returns the resulting document tree
15473 */
15474xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015475xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015476{
15477 xmlParserCtxtPtr ctxt;
15478 xmlParserInputBufferPtr input;
15479 xmlParserInputPtr stream;
15480
15481 if (fd < 0)
15482 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015483 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015484
15485 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15486 if (input == NULL)
15487 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015488 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015489 ctxt = xmlNewParserCtxt();
15490 if (ctxt == NULL) {
15491 xmlFreeParserInputBuffer(input);
15492 return (NULL);
15493 }
15494 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15495 if (stream == NULL) {
15496 xmlFreeParserInputBuffer(input);
15497 xmlFreeParserCtxt(ctxt);
15498 return (NULL);
15499 }
15500 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015501 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015502}
15503
15504/**
15505 * xmlReadIO:
15506 * @ioread: an I/O read function
15507 * @ioclose: an I/O close function
15508 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015509 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015510 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015511 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015512 *
15513 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015514 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015515 * Returns the resulting document tree
15516 */
15517xmlDocPtr
15518xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015519 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015520{
15521 xmlParserCtxtPtr ctxt;
15522 xmlParserInputBufferPtr input;
15523 xmlParserInputPtr stream;
15524
15525 if (ioread == NULL)
15526 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015527 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015528
15529 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15530 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015531 if (input == NULL) {
15532 if (ioclose != NULL)
15533 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015534 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015535 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015536 ctxt = xmlNewParserCtxt();
15537 if (ctxt == NULL) {
15538 xmlFreeParserInputBuffer(input);
15539 return (NULL);
15540 }
15541 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15542 if (stream == NULL) {
15543 xmlFreeParserInputBuffer(input);
15544 xmlFreeParserCtxt(ctxt);
15545 return (NULL);
15546 }
15547 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015548 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015549}
15550
15551/**
15552 * xmlCtxtReadDoc:
15553 * @ctxt: an XML parser context
15554 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015555 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015556 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015557 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015558 *
15559 * parse an XML in-memory document and build a tree.
15560 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015561 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015562 * Returns the resulting document tree
15563 */
15564xmlDocPtr
15565xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015566 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015567{
15568 xmlParserInputPtr stream;
15569
15570 if (cur == NULL)
15571 return (NULL);
15572 if (ctxt == NULL)
15573 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015574 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015575
15576 xmlCtxtReset(ctxt);
15577
15578 stream = xmlNewStringInputStream(ctxt, cur);
15579 if (stream == NULL) {
15580 return (NULL);
15581 }
15582 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015583 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015584}
15585
15586/**
15587 * xmlCtxtReadFile:
15588 * @ctxt: an XML parser context
15589 * @filename: a file or URL
15590 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015591 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015592 *
15593 * parse an XML file from the filesystem or the network.
15594 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015595 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015596 * Returns the resulting document tree
15597 */
15598xmlDocPtr
15599xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15600 const char *encoding, int options)
15601{
15602 xmlParserInputPtr stream;
15603
15604 if (filename == NULL)
15605 return (NULL);
15606 if (ctxt == NULL)
15607 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015608 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015609
15610 xmlCtxtReset(ctxt);
15611
Daniel Veillard29614c72004-11-26 10:47:26 +000015612 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015613 if (stream == NULL) {
15614 return (NULL);
15615 }
15616 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015617 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015618}
15619
15620/**
15621 * xmlCtxtReadMemory:
15622 * @ctxt: an XML parser context
15623 * @buffer: a pointer to a char array
15624 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015625 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015626 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015627 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015628 *
15629 * parse an XML in-memory document and build a tree.
15630 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015631 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015632 * Returns the resulting document tree
15633 */
15634xmlDocPtr
15635xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015636 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015637{
15638 xmlParserInputBufferPtr input;
15639 xmlParserInputPtr stream;
15640
15641 if (ctxt == NULL)
15642 return (NULL);
15643 if (buffer == NULL)
15644 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015645 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015646
15647 xmlCtxtReset(ctxt);
15648
15649 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15650 if (input == NULL) {
15651 return(NULL);
15652 }
15653
15654 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15655 if (stream == NULL) {
15656 xmlFreeParserInputBuffer(input);
15657 return(NULL);
15658 }
15659
15660 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015661 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015662}
15663
15664/**
15665 * xmlCtxtReadFd:
15666 * @ctxt: an XML parser context
15667 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015668 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015669 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015670 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015671 *
15672 * parse an XML from a file descriptor and build a tree.
15673 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015674 * NOTE that the file descriptor will not be closed when the
15675 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015676 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015677 * Returns the resulting document tree
15678 */
15679xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015680xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15681 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015682{
15683 xmlParserInputBufferPtr input;
15684 xmlParserInputPtr stream;
15685
15686 if (fd < 0)
15687 return (NULL);
15688 if (ctxt == NULL)
15689 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015690 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015691
15692 xmlCtxtReset(ctxt);
15693
15694
15695 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15696 if (input == NULL)
15697 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015698 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015699 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15700 if (stream == NULL) {
15701 xmlFreeParserInputBuffer(input);
15702 return (NULL);
15703 }
15704 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015705 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015706}
15707
15708/**
15709 * xmlCtxtReadIO:
15710 * @ctxt: an XML parser context
15711 * @ioread: an I/O read function
15712 * @ioclose: an I/O close function
15713 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015714 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015715 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015716 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015717 *
15718 * parse an XML document from I/O functions and source and build a tree.
15719 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015720 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015721 * Returns the resulting document tree
15722 */
15723xmlDocPtr
15724xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15725 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015726 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015727 const char *encoding, int options)
15728{
15729 xmlParserInputBufferPtr input;
15730 xmlParserInputPtr stream;
15731
15732 if (ioread == NULL)
15733 return (NULL);
15734 if (ctxt == NULL)
15735 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015736 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015737
15738 xmlCtxtReset(ctxt);
15739
15740 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15741 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015742 if (input == NULL) {
15743 if (ioclose != NULL)
15744 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015745 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015746 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015747 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15748 if (stream == NULL) {
15749 xmlFreeParserInputBuffer(input);
15750 return (NULL);
15751 }
15752 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015753 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015754}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015755
15756#define bottom_parser
15757#include "elfgcchack.h"