blob: 2304e5777cbc2b9a8548748d38bbfd950b7c86b6 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800125 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 if (replacement != 0) {
134 if (replacement < XML_MAX_TEXT_LENGTH)
135 return(0);
136
137 /*
138 * If the volume of entity copy reaches 10 times the
139 * amount of parsed data and over the large text threshold
140 * then that's very likely to be an abuse.
141 */
142 if (ctxt->input != NULL) {
143 consumed = ctxt->input->consumed +
144 (ctxt->input->cur - ctxt->input->base);
145 }
146 consumed += ctxt->sizeentities;
147
148 if (replacement < XML_PARSER_NON_LINEAR * consumed)
149 return(0);
150 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000151 /*
152 * Do the check based on the replacement size of the entity
153 */
154 if (size < XML_PARSER_BIG_ENTITY)
155 return(0);
156
157 /*
158 * A limit on the amount of text data reasonably used
159 */
160 if (ctxt->input != NULL) {
161 consumed = ctxt->input->consumed +
162 (ctxt->input->cur - ctxt->input->base);
163 }
164 consumed += ctxt->sizeentities;
165
166 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168 return (0);
169 } else if (ent != NULL) {
170 /*
171 * use the number of parsed entities in the replacement
172 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800173 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000174
175 /*
176 * The amount of data parsed counting entities size only once
177 */
178 if (ctxt->input != NULL) {
179 consumed = ctxt->input->consumed +
180 (ctxt->input->cur - ctxt->input->base);
181 }
182 consumed += ctxt->sizeentities;
183
184 /*
185 * Check the density of entities for the amount of data
186 * knowing an entity reference will take at least 3 bytes
187 */
188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189 return (0);
190 } else {
191 /*
192 * strange we got no data for checking just return
193 */
194 return (0);
195 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 return (1);
198}
199
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000200/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000201 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000203 * arbitrary depth limit for the XML documents that we allow to
204 * process. This is not a limitation of the parser but a safety
205 * boundary feature. It can be disabled with the XML_PARSE_HUGE
206 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000207 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000208unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard0fb18932003-09-07 09:14:37 +0000210
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000213#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000214#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000215#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216
Daniel Veillard1f972e92012-08-15 10:16:37 +0800217/**
218 * XML_PARSER_CHUNK_SIZE
219 *
220 * When calling GROW that's the minimal amount of data
221 * the parser expected to have received. It is not a hard
222 * limit but an optimization when reading strings like Names
223 * It is not strictly needed as long as inputs available characters
224 * are followed by 0, which should be provided by the I/O level
225 */
226#define XML_PARSER_CHUNK_SIZE 100
227
Owen Taylor3473f882001-02-23 17:55:21 +0000228/*
Owen Taylor3473f882001-02-23 17:55:21 +0000229 * List of XML prefixed PI allowed by W3C specs
230 */
231
Daniel Veillardb44025c2001-10-11 22:55:55 +0000232static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000233 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800234 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000235 NULL
236};
237
Daniel Veillarda07050d2003-10-19 14:46:32 +0000238
Owen Taylor3473f882001-02-23 17:55:21 +0000239/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200240static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000242
Daniel Veillard7d515752003-09-26 19:12:37 +0000243static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000244xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000246 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000247 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard37334572008-07-31 08:20:02 +0000249static int
250xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000252#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000253static void
254xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000256#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000257
Daniel Veillard7d515752003-09-26 19:12:37 +0000258static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000259xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000261
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000262static int
263xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264
Daniel Veillarde57ec792003-09-10 10:50:59 +0000265/************************************************************************
266 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800267 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 * *
269 ************************************************************************/
270
271/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 * xmlErrAttributeDup:
273 * @ctxt: an XML parser context
274 * @prefix: the attribute prefix
275 * @localname: the attribute localname
276 *
277 * Handle a redefinition of attribute error
278 */
279static void
280xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281 const xmlChar * localname)
282{
Daniel Veillard157fee02003-10-31 10:36:03 +0000283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284 (ctxt->instate == XML_PARSER_EOF))
285 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000286 if (ctxt != NULL)
287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200288
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000289 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 (const char *) localname, NULL, NULL, 0, 0,
293 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000294 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 (const char *) prefix, (const char *) localname,
298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000300 if (ctxt != NULL) {
301 ctxt->wellFormed = 0;
302 if (ctxt->recovery == 0)
303 ctxt->disableSAX = 1;
304 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305}
306
307/**
308 * xmlFatalErr:
309 * @ctxt: an XML parser context
310 * @error: the error number
311 * @extra: extra information string
312 *
313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314 */
315static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317{
318 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800319 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320
Daniel Veillard157fee02003-10-31 10:36:03 +0000321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322 (ctxt->instate == XML_PARSER_EOF))
323 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 switch (error) {
325 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800326 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800329 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800332 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "internal error";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800338 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800341 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800344 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800350 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800353 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800356 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "Fragment not allowed";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
440 case XML_ERR_CONDSEC_INVALID_KEYWORD:
441 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800442 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800445 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800448 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800451 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800454 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000456 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000492 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800495 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 if (info == NULL)
507 snprintf(errstr, 128, "%s\n", errmsg);
508 else
509 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000510 if (ctxt != NULL)
511 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000514 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL) {
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0)
518 ctxt->disableSAX = 1;
519 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520}
521
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000522/**
523 * xmlFatalErrMsg:
524 * @ctxt: an XML parser context
525 * @error: the error number
526 * @msg: the error message
527 *
528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529 */
530static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000533{
Daniel Veillard157fee02003-10-31 10:36:03 +0000534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535 (ctxt->instate == XML_PARSER_EOF))
536 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000537 if (ctxt != NULL)
538 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000546}
547
548/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000549 * xmlWarningMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 * @str2: extra data
555 *
556 * Handle a warning.
557 */
558static void
559xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg, const xmlChar *str1, const xmlChar *str2)
561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000563
Daniel Veillard157fee02003-10-31 10:36:03 +0000564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000569 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200570 if (ctxt != NULL) {
571 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000572 (ctxt->sax) ? ctxt->sax->warning : NULL,
573 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_WARNING, NULL, 0,
576 (const char *) str1, (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200578 } else {
579 __xmlRaiseError(schannel, NULL, NULL,
580 ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_WARNING, NULL, 0,
582 (const char *) str1, (const char *) str2, NULL, 0, 0,
583 msg, (const char *) str1, (const char *) str2);
584 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585}
586
587/**
588 * xmlValidityError:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the error message
592 * @str1: extra data
593 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000594 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595 */
596static void
597xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000598 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000599{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000600 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000601
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->errNo = error;
607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608 schannel = ctxt->sax->serror;
609 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200610 if (ctxt != NULL) {
611 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000612 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613 ctxt, NULL, XML_FROM_DTD, error,
614 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000615 (const char *) str2, NULL, 0, 0,
616 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000617 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200618 } else {
619 __xmlRaiseError(schannel, NULL, NULL,
620 ctxt, NULL, XML_FROM_DTD, error,
621 XML_ERR_ERROR, NULL, 0, (const char *) str1,
622 (const char *) str2, NULL, 0, 0,
623 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000624 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000625}
626
627/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000628 * xmlFatalErrMsgInt:
629 * @ctxt: an XML parser context
630 * @error: the error number
631 * @msg: the error message
632 * @val: an integer value
633 *
634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635 */
636static void
637xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000638 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000639{
Daniel Veillard157fee02003-10-31 10:36:03 +0000640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000643 if (ctxt != NULL)
644 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000645 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000648 if (ctxt != NULL) {
649 ctxt->wellFormed = 0;
650 if (ctxt->recovery == 0)
651 ctxt->disableSAX = 1;
652 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000653}
654
655/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000656 * xmlFatalErrMsgStrIntStr:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @str1: an string info
661 * @val: an integer value
662 * @str2: an string info
663 *
664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665 */
666static void
667xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800668 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000669 const xmlChar *str2)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678 NULL, 0, (const char *) str1, (const char *) str2,
679 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000680 if (ctxt != NULL) {
681 ctxt->wellFormed = 0;
682 if (ctxt->recovery == 0)
683 ctxt->disableSAX = 1;
684 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000685}
686
687/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000688 * xmlFatalErrMsgStr:
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the error message
692 * @val: a string value
693 *
694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695 */
696static void
697xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000698 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000699{
Daniel Veillard157fee02003-10-31 10:36:03 +0000700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701 (ctxt->instate == XML_PARSER_EOF))
702 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000706 XML_FROM_PARSER, error, XML_ERR_FATAL,
707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL) {
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
713 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000714}
715
716/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000717 * xmlErrMsgStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @val: a string value
722 *
723 * Handle a non fatal parser error
724 */
725static void
726xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727 const char *msg, const xmlChar * val)
728{
Daniel Veillard157fee02003-10-31 10:36:03 +0000729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730 (ctxt->instate == XML_PARSER_EOF))
731 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000732 if (ctxt != NULL)
733 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000735 XML_FROM_PARSER, error, XML_ERR_ERROR,
736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737 val);
738}
739
740/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000741 * xmlNsErr:
742 * @ctxt: an XML parser context
743 * @error: the error number
744 * @msg: the message
745 * @info1: extra information string
746 * @info2: extra information string
747 *
748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749 */
750static void
751xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000753 const xmlChar * info1, const xmlChar * info2,
754 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000755{
Daniel Veillard157fee02003-10-31 10:36:03 +0000756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757 (ctxt->instate == XML_PARSER_EOF))
758 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000759 if (ctxt != NULL)
760 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000762 XML_ERR_ERROR, NULL, 0, (const char *) info1,
763 (const char *) info2, (const char *) info3, 0, 0, msg,
764 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000765 if (ctxt != NULL)
766 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000767}
768
Daniel Veillard37334572008-07-31 08:20:02 +0000769/**
770 * xmlNsWarn
771 * @ctxt: an XML parser context
772 * @error: the error number
773 * @msg: the message
774 * @info1: extra information string
775 * @info2: extra information string
776 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800777 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000778 */
779static void
780xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781 const char *msg,
782 const xmlChar * info1, const xmlChar * info2,
783 const xmlChar * info3)
784{
785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786 (ctxt->instate == XML_PARSER_EOF))
787 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789 XML_ERR_WARNING, NULL, 0, (const char *) info1,
790 (const char *) info2, (const char *) info3, 0, 0, msg,
791 info1, info2, info3);
792}
793
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000794/************************************************************************
795 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800796 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797 * *
798 ************************************************************************/
799
800/**
801 * xmlHasFeature:
802 * @feature: the feature to be examined
803 *
804 * Examines if the library has been compiled with a given feature.
805 *
806 * Returns a non-zero value if the feature exist, otherwise zero.
807 * Returns zero (0) if the feature does not exist or an unknown
808 * unknown feature is requested, non-zero otherwise.
809 */
810int
811xmlHasFeature(xmlFeature feature)
812{
813 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_THREAD_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_TREE_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_OUTPUT_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_PUSH_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_READER_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_PATTERN_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_WRITER_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_SAX1_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_FTP_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_HTTP_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_VALID_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_HTML_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_LEGACY_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_C14N_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_CATALOG_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_XPATH_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_XPTR_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_XINCLUDE_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_ICONV_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_ISO8859X_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_UNICODE_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_REGEXP_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_AUTOMATA_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_EXPR_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_SCHEMAS_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_SCHEMATRON_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_MODULES_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_DEBUG_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef DEBUG_MEMORY_LOCATION
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_DEBUG_RUNTIME
990 return(1);
991#else
992 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000993#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000994 case XML_WITH_ZLIB:
995#ifdef LIBXML_ZLIB_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001000 case XML_WITH_LZMA:
1001#ifdef LIBXML_LZMA_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 case XML_WITH_ICU:
1007#ifdef LIBXML_ICU_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012 default:
1013 break;
1014 }
1015 return(0);
1016}
1017
1018/************************************************************************
1019 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001020 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 * *
1022 ************************************************************************/
1023
1024/**
1025 * xmlDetectSAX2:
1026 * @ctxt: an XML parser context
1027 *
1028 * Do the SAX2 detection and specific intialization
1029 */
1030static void
1031xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035 ((ctxt->sax->startElementNs != NULL) ||
1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001037#else
1038 ctxt->sax2 = 1;
1039#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040
1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001046 xmlErrMemory(ctxt, NULL);
1047 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001048}
1049
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050typedef struct _xmlDefAttrs xmlDefAttrs;
1051typedef xmlDefAttrs *xmlDefAttrsPtr;
1052struct _xmlDefAttrs {
1053 int nbAttrs; /* number of defaulted attributes on that element */
1054 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001055 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057
1058/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001059 * xmlAttrNormalizeSpace:
1060 * @src: the source string
1061 * @dst: the target string
1062 *
1063 * Normalize the space in non CDATA attribute values:
1064 * If the attribute type is not CDATA, then the XML processor MUST further
1065 * process the normalized attribute value by discarding any leading and
1066 * trailing space (#x20) characters, and by replacing sequences of space
1067 * (#x20) characters by a single space (#x20) character.
1068 * Note that the size of dst need to be at least src, and if one doesn't need
1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070 * passing src as dst is just fine.
1071 *
1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073 * is needed.
1074 */
1075static xmlChar *
1076xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077{
1078 if ((src == NULL) || (dst == NULL))
1079 return(NULL);
1080
1081 while (*src == 0x20) src++;
1082 while (*src != 0) {
1083 if (*src == 0x20) {
1084 while (*src == 0x20) src++;
1085 if (*src != 0)
1086 *dst++ = 0x20;
1087 } else {
1088 *dst++ = *src++;
1089 }
1090 }
1091 *dst = 0;
1092 if (dst == src)
1093 return(NULL);
1094 return(dst);
1095}
1096
1097/**
1098 * xmlAttrNormalizeSpace2:
1099 * @src: the source string
1100 *
1101 * Normalize the space in non CDATA attribute values, a slightly more complex
1102 * front end to avoid allocation problems when running on attribute values
1103 * coming from the input.
1104 *
1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106 * is needed.
1107 */
1108static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001109xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001110{
1111 int i;
1112 int remove_head = 0;
1113 int need_realloc = 0;
1114 const xmlChar *cur;
1115
1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117 return(NULL);
1118 i = *len;
1119 if (i <= 0)
1120 return(NULL);
1121
1122 cur = src;
1123 while (*cur == 0x20) {
1124 cur++;
1125 remove_head++;
1126 }
1127 while (*cur != 0) {
1128 if (*cur == 0x20) {
1129 cur++;
1130 if ((*cur == 0x20) || (*cur == 0)) {
1131 need_realloc = 1;
1132 break;
1133 }
1134 } else
1135 cur++;
1136 }
1137 if (need_realloc) {
1138 xmlChar *ret;
1139
1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141 if (ret == NULL) {
1142 xmlErrMemory(ctxt, NULL);
1143 return(NULL);
1144 }
1145 xmlAttrNormalizeSpace(ret, ret);
1146 *len = (int) strlen((const char *)ret);
1147 return(ret);
1148 } else if (remove_head) {
1149 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 memmove(src, src + remove_head, 1 + *len);
1151 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001152 }
1153 return(NULL);
1154}
1155
1156/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 * xmlAddDefAttrs:
1158 * @ctxt: an XML parser context
1159 * @fullname: the element fullname
1160 * @fullattr: the attribute fullname
1161 * @value: the attribute value
1162 *
1163 * Add a defaulted attribute for an element
1164 */
1165static void
1166xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167 const xmlChar *fullname,
1168 const xmlChar *fullattr,
1169 const xmlChar *value) {
1170 xmlDefAttrsPtr defaults;
1171 int len;
1172 const xmlChar *name;
1173 const xmlChar *prefix;
1174
Daniel Veillard6a31b832008-03-26 14:06:44 +00001175 /*
1176 * Allows to detect attribute redefinitions
1177 */
1178 if (ctxt->attsSpecial != NULL) {
1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180 return;
1181 }
1182
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 if (ctxt->attsDefault == NULL)
1186 goto mem_error;
1187 }
1188
1189 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 * split the element name into prefix:localname , the string found
1191 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 */
1193 name = xmlSplitQName3(fullname, &len);
1194 if (name == NULL) {
1195 name = xmlDictLookup(ctxt->dict, fullname, -1);
1196 prefix = NULL;
1197 } else {
1198 name = xmlDictLookup(ctxt->dict, name, -1);
1199 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200 }
1201
1202 /*
1203 * make sure there is some storage
1204 */
1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206 if (defaults == NULL) {
1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 if (defaults == NULL)
1210 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001212 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214 defaults, NULL) < 0) {
1215 xmlFree(defaults);
1216 goto mem_error;
1217 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001219 xmlDefAttrsPtr temp;
1220
1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001223 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228 defaults, NULL) < 0) {
1229 xmlFree(defaults);
1230 goto mem_error;
1231 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 }
1233
1234 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001235 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001236 * are within the DTD and hen not associated to namespace names.
1237 */
1238 name = xmlSplitQName3(fullattr, &len);
1239 if (name == NULL) {
1240 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241 prefix = NULL;
1242 } else {
1243 name = xmlDictLookup(ctxt->dict, name, -1);
1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245 }
1246
Daniel Veillardae0765b2008-07-31 19:54:59 +00001247 defaults->values[5 * defaults->nbAttrs] = name;
1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 /* intern the string and precompute the end */
1250 len = xmlStrlen(value);
1251 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001252 defaults->values[5 * defaults->nbAttrs + 2] = value;
1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254 if (ctxt->external)
1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256 else
1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 defaults->nbAttrs++;
1259
1260 return;
1261
1262mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001263 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 return;
1265}
1266
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001267/**
1268 * xmlAddSpecialAttr:
1269 * @ctxt: an XML parser context
1270 * @fullname: the element fullname
1271 * @fullattr: the attribute fullname
1272 * @type: the attribute type
1273 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001275 */
1276static void
1277xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278 const xmlChar *fullname,
1279 const xmlChar *fullattr,
1280 int type)
1281{
1282 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001284 if (ctxt->attsSpecial == NULL)
1285 goto mem_error;
1286 }
1287
Daniel Veillardac4118d2008-01-11 05:27:32 +00001288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289 return;
1290
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001293 return;
1294
1295mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001296 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001297 return;
1298}
1299
Daniel Veillard4432df22003-09-28 18:58:27 +00001300/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001301 * xmlCleanSpecialAttrCallback:
1302 *
1303 * Removes CDATA attributes from the special attribute table
1304 */
1305static void
1306xmlCleanSpecialAttrCallback(void *payload, void *data,
1307 const xmlChar *fullname, const xmlChar *fullattr,
1308 const xmlChar *unused ATTRIBUTE_UNUSED) {
1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001311 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313 }
1314}
1315
1316/**
1317 * xmlCleanSpecialAttr:
1318 * @ctxt: an XML parser context
1319 *
1320 * Trim the list of attributes defined to remove all those of type
1321 * CDATA as they are not special. This call should be done when finishing
1322 * to parse the DTD and before starting to parse the document root.
1323 */
1324static void
1325xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326{
1327 if (ctxt->attsSpecial == NULL)
1328 return;
1329
1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331
1332 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333 xmlHashFree(ctxt->attsSpecial, NULL);
1334 ctxt->attsSpecial = NULL;
1335 }
1336 return;
1337}
1338
1339/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001340 * xmlCheckLanguageID:
1341 * @lang: pointer to the string value
1342 *
1343 * Checks that the value conforms to the LanguageID production:
1344 *
1345 * NOTE: this is somewhat deprecated, those productions were removed from
1346 * the XML Second edition.
1347 *
1348 * [33] LanguageID ::= Langcode ('-' Subcode)*
1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353 * [38] Subcode ::= ([a-z] | [A-Z])+
1354 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001355 * The current REC reference the sucessors of RFC 1766, currently 5646
1356 *
1357 * http://www.rfc-editor.org/rfc/rfc5646.txt
1358 * langtag = language
1359 * ["-" script]
1360 * ["-" region]
1361 * *("-" variant)
1362 * *("-" extension)
1363 * ["-" privateuse]
1364 * language = 2*3ALPHA ; shortest ISO 639 code
1365 * ["-" extlang] ; sometimes followed by
1366 * ; extended language subtags
1367 * / 4ALPHA ; or reserved for future use
1368 * / 5*8ALPHA ; or registered language subtag
1369 *
1370 * extlang = 3ALPHA ; selected ISO 639 codes
1371 * *2("-" 3ALPHA) ; permanently reserved
1372 *
1373 * script = 4ALPHA ; ISO 15924 code
1374 *
1375 * region = 2ALPHA ; ISO 3166-1 code
1376 * / 3DIGIT ; UN M.49 code
1377 *
1378 * variant = 5*8alphanum ; registered variants
1379 * / (DIGIT 3alphanum)
1380 *
1381 * extension = singleton 1*("-" (2*8alphanum))
1382 *
1383 * ; Single alphanumerics
1384 * ; "x" reserved for private use
1385 * singleton = DIGIT ; 0 - 9
1386 * / %x41-57 ; A - W
1387 * / %x59-5A ; Y - Z
1388 * / %x61-77 ; a - w
1389 * / %x79-7A ; y - z
1390 *
1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392 * The parser below doesn't try to cope with extension or privateuse
1393 * that could be added but that's not interoperable anyway
1394 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001395 * Returns 1 if correct 0 otherwise
1396 **/
1397int
1398xmlCheckLanguageID(const xmlChar * lang)
1399{
Daniel Veillard60587d62010-11-04 15:16:27 +01001400 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001401
1402 if (cur == NULL)
1403 return (0);
1404 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001405 ((cur[0] == 'I') && (cur[1] == '-')) ||
1406 ((cur[0] == 'x') && (cur[1] == '-')) ||
1407 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001408 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001409 * Still allow IANA code and user code which were coming
1410 * from the previous version of the XML-1.0 specification
1411 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001412 */
1413 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001415 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001417 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001418 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001419 nxt = cur;
1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422 nxt++;
1423 if (nxt - cur >= 4) {
1424 /*
1425 * Reserved
1426 */
1427 if ((nxt - cur > 8) || (nxt[0] != 0))
1428 return(0);
1429 return(1);
1430 }
1431 if (nxt - cur < 2)
1432 return(0);
1433 /* we got an ISO 639 code */
1434 if (nxt[0] == 0)
1435 return(1);
1436 if (nxt[0] != '-')
1437 return(0);
1438
1439 nxt++;
1440 cur = nxt;
1441 /* now we can have extlang or script or region or variant */
1442 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443 goto region_m49;
1444
1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447 nxt++;
1448 if (nxt - cur == 4)
1449 goto script;
1450 if (nxt - cur == 2)
1451 goto region;
1452 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453 goto variant;
1454 if (nxt - cur != 3)
1455 return(0);
1456 /* we parsed an extlang */
1457 if (nxt[0] == 0)
1458 return(1);
1459 if (nxt[0] != '-')
1460 return(0);
1461
1462 nxt++;
1463 cur = nxt;
1464 /* now we can have script or region or variant */
1465 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466 goto region_m49;
1467
1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470 nxt++;
1471 if (nxt - cur == 2)
1472 goto region;
1473 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474 goto variant;
1475 if (nxt - cur != 4)
1476 return(0);
1477 /* we parsed a script */
1478script:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483
1484 nxt++;
1485 cur = nxt;
1486 /* now we can have region or variant */
1487 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488 goto region_m49;
1489
1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492 nxt++;
1493
1494 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495 goto variant;
1496 if (nxt - cur != 2)
1497 return(0);
1498 /* we parsed a region */
1499region:
1500 if (nxt[0] == 0)
1501 return(1);
1502 if (nxt[0] != '-')
1503 return(0);
1504
1505 nxt++;
1506 cur = nxt;
1507 /* now we can just have a variant */
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511
1512 if ((nxt - cur < 5) || (nxt - cur > 8))
1513 return(0);
1514
1515 /* we parsed a variant */
1516variant:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001522 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001523
1524region_m49:
1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527 nxt += 3;
1528 goto region;
1529 }
1530 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001531}
1532
Owen Taylor3473f882001-02-23 17:55:21 +00001533/************************************************************************
1534 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001535 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001536 * *
1537 ************************************************************************/
1538
Daniel Veillard8ed10722009-08-20 19:17:36 +02001539static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001541
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542#ifdef SAX2
1543/**
1544 * nsPush:
1545 * @ctxt: an XML parser context
1546 * @prefix: the namespace prefix or NULL
1547 * @URL: the namespace name
1548 *
1549 * Pushes a new parser namespace on top of the ns stack
1550 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001551 * Returns -1 in case of error, -2 if the namespace should be discarded
1552 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 */
1554static int
1555nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001557 if (ctxt->options & XML_PARSE_NSCLEAN) {
1558 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001560 if (ctxt->nsTab[i] == prefix) {
1561 /* in scope */
1562 if (ctxt->nsTab[i + 1] == URL)
1563 return(-2);
1564 /* out of scope keep it */
1565 break;
1566 }
1567 }
1568 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570 ctxt->nsMax = 10;
1571 ctxt->nsNr = 0;
1572 ctxt->nsTab = (const xmlChar **)
1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001575 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001576 ctxt->nsMax = 0;
1577 return (-1);
1578 }
1579 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001580 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001586 ctxt->nsMax /= 2;
1587 return (-1);
1588 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001589 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 }
1591 ctxt->nsTab[ctxt->nsNr++] = prefix;
1592 ctxt->nsTab[ctxt->nsNr++] = URL;
1593 return (ctxt->nsNr);
1594}
1595/**
1596 * nsPop:
1597 * @ctxt: an XML parser context
1598 * @nr: the number to pop
1599 *
1600 * Pops the top @nr parser prefix/namespace from the ns stack
1601 *
1602 * Returns the number of namespaces removed
1603 */
1604static int
1605nsPop(xmlParserCtxtPtr ctxt, int nr)
1606{
1607 int i;
1608
1609 if (ctxt->nsTab == NULL) return(0);
1610 if (ctxt->nsNr < nr) {
1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612 nr = ctxt->nsNr;
1613 }
1614 if (ctxt->nsNr <= 0)
1615 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001616
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 for (i = 0;i < nr;i++) {
1618 ctxt->nsNr--;
1619 ctxt->nsTab[ctxt->nsNr] = NULL;
1620 }
1621 return(nr);
1622}
1623#endif
1624
1625static int
1626xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001628 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001629 int maxatts;
1630
1631 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001633 atts = (const xmlChar **)
1634 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638 if (attallocs == NULL) goto mem_error;
1639 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001640 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001641 } else if (nr + 5 > ctxt->maxatts) {
1642 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001645 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001646 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648 (maxatts / 5) * sizeof(int));
1649 if (attallocs == NULL) goto mem_error;
1650 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001651 ctxt->maxatts = maxatts;
1652 }
1653 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001654mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657}
1658
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659/**
1660 * inputPush:
1661 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001662 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 *
1664 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001665 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001666 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001667 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001668int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001671 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001672 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 if (ctxt->inputNr >= ctxt->inputMax) {
1674 ctxt->inputMax *= 2;
1675 ctxt->inputTab =
1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677 ctxt->inputMax *
1678 sizeof(ctxt->inputTab[0]));
1679 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001681 xmlFreeInputStream(value);
1682 ctxt->inputMax /= 2;
1683 value = NULL;
1684 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001685 }
1686 }
1687 ctxt->inputTab[ctxt->inputNr] = value;
1688 ctxt->input = value;
1689 return (ctxt->inputNr++);
1690}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001693 * @ctxt: an XML parser context
1694 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001696 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001699xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700inputPop(xmlParserCtxtPtr ctxt)
1701{
1702 xmlParserInputPtr ret;
1703
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001704 if (ctxt == NULL)
1705 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001707 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 ctxt->inputNr--;
1709 if (ctxt->inputNr > 0)
1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711 else
1712 ctxt->input = NULL;
1713 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001714 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715 return (ret);
1716}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001717/**
1718 * nodePush:
1719 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001720 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 *
1722 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001723 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001724 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001729 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001731 xmlNodePtr *tmp;
1732
1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001736 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001737 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001738 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001739 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001740 ctxt->nodeTab = tmp;
1741 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001742 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001747 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001748 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001749 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001750 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001751 ctxt->nodeTab[ctxt->nodeNr] = value;
1752 ctxt->node = value;
1753 return (ctxt->nodeNr++);
1754}
Daniel Veillard8915c152008-08-26 13:05:34 +00001755
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756/**
1757 * nodePop:
1758 * @ctxt: an XML parser context
1759 *
1760 * Pops the top element node from the node stack
1761 *
1762 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001763 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001764xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765nodePop(xmlParserCtxtPtr ctxt)
1766{
1767 xmlNodePtr ret;
1768
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001771 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001772 ctxt->nodeNr--;
1773 if (ctxt->nodeNr > 0)
1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775 else
1776 ctxt->node = NULL;
1777 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001778 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 return (ret);
1780}
Daniel Veillarda2351322004-06-27 12:08:10 +00001781
1782#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001784 * nameNsPush:
1785 * @ctxt: an XML parser context
1786 * @value: the element name
1787 * @prefix: the element prefix
1788 * @URI: the element namespace name
1789 *
1790 * Pushes a new element name/prefix/URL on top of the name stack
1791 *
1792 * Returns -1 in case of error, the index in the stack otherwise
1793 */
1794static int
1795nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797{
1798 if (ctxt->nameNr >= ctxt->nameMax) {
1799 const xmlChar * *tmp;
1800 void **tmp2;
1801 ctxt->nameMax *= 2;
1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803 ctxt->nameMax *
1804 sizeof(ctxt->nameTab[0]));
1805 if (tmp == NULL) {
1806 ctxt->nameMax /= 2;
1807 goto mem_error;
1808 }
1809 ctxt->nameTab = tmp;
1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811 ctxt->nameMax * 3 *
1812 sizeof(ctxt->pushTab[0]));
1813 if (tmp2 == NULL) {
1814 ctxt->nameMax /= 2;
1815 goto mem_error;
1816 }
1817 ctxt->pushTab = tmp2;
1818 }
1819 ctxt->nameTab[ctxt->nameNr] = value;
1820 ctxt->name = value;
1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 return (ctxt->nameNr++);
1825mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 return (-1);
1828}
1829/**
1830 * nameNsPop:
1831 * @ctxt: an XML parser context
1832 *
1833 * Pops the top element/prefix/URI name from the name stack
1834 *
1835 * Returns the name just removed
1836 */
1837static const xmlChar *
1838nameNsPop(xmlParserCtxtPtr ctxt)
1839{
1840 const xmlChar *ret;
1841
1842 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001843 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844 ctxt->nameNr--;
1845 if (ctxt->nameNr > 0)
1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847 else
1848 ctxt->name = NULL;
1849 ret = ctxt->nameTab[ctxt->nameNr];
1850 ctxt->nameTab[ctxt->nameNr] = NULL;
1851 return (ret);
1852}
Daniel Veillarda2351322004-06-27 12:08:10 +00001853#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854
1855/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001856 * namePush:
1857 * @ctxt: an XML parser context
1858 * @value: the element name
1859 *
1860 * Pushes a new element name on top of the name stack
1861 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001864int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001865namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001866{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001867 if (ctxt == NULL) return (-1);
1868
Daniel Veillard1c732d22002-11-30 11:22:59 +00001869 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001872 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001873 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001874 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001875 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001878 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001879 }
1880 ctxt->nameTab[ctxt->nameNr] = value;
1881 ctxt->name = value;
1882 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001883mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001884 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886}
1887/**
1888 * namePop:
1889 * @ctxt: an XML parser context
1890 *
1891 * Pops the top element name from the name stack
1892 *
1893 * Returns the name just removed
1894 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001895const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896namePop(xmlParserCtxtPtr ctxt)
1897{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001898 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001899
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001900 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 ctxt->nameNr--;
1903 if (ctxt->nameNr > 0)
1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905 else
1906 ctxt->name = NULL;
1907 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001908 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 return (ret);
1910}
Owen Taylor3473f882001-02-23 17:55:21 +00001911
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001912static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001913 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001914 int *tmp;
1915
Owen Taylor3473f882001-02-23 17:55:21 +00001916 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001917 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001920 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001921 ctxt->spaceMax /=2;
1922 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001923 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001924 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 }
1926 ctxt->spaceTab[ctxt->spaceNr] = val;
1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928 return(ctxt->spaceNr++);
1929}
1930
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001931static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 int ret;
1933 if (ctxt->spaceNr <= 0) return(0);
1934 ctxt->spaceNr--;
1935 if (ctxt->spaceNr > 0)
1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001938 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001939 ret = ctxt->spaceTab[ctxt->spaceNr];
1940 ctxt->spaceTab[ctxt->spaceNr] = -1;
1941 return(ret);
1942}
1943
1944/*
1945 * Macros for accessing the content. Those should be used only by the parser,
1946 * and not exported.
1947 *
1948 * Dirty macros, i.e. one often need to make assumption on the context to
1949 * use them
1950 *
1951 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952 * To be used with extreme caution since operations consuming
1953 * characters may move the input buffer to a different location !
1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955 * This should be used internally by the parser
1956 * only to compare to ASCII values otherwise it would break when
1957 * running with UTF-8 encoding.
1958 * RAW same as CUR but in the input buffer, bypass any token
1959 * extraction that may have been done
1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961 * to compare on ASCII based substring.
1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001963 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967 *
1968 * NEXT Skip to the next character, this does the proper decoding
1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001970 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001971 * CUR_CHAR(l) returns the current unicode character (int), set l
1972 * to the number of xmlChars used for the encoding [0-5].
1973 * CUR_SCHAR same but operate on a string instead of the context
1974 * COPY_BUF copy the current unicode char to the target buffer, increment
1975 * the index
1976 * GROW, SHRINK handling of input buffers
1977 */
1978
Daniel Veillardfdc91562002-07-01 21:52:03 +00001979#define RAW (*ctxt->input->cur)
1980#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001981#define NXT(val) ctxt->input->cur[(val)]
1982#define CUR_PTR ctxt->input->cur
1983
Daniel Veillarda07050d2003-10-19 14:46:32 +00001984#define CMP4( s, c1, c2, c3, c4 ) \
1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997 ((unsigned char *) s)[ 8 ] == c9 )
1998#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000 ((unsigned char *) s)[ 9 ] == c10 )
2001
Owen Taylor3473f882001-02-23 17:55:21 +00002002#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002005 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007 xmlPopInput(ctxt); \
2008 } while (0)
2009
Daniel Veillard0b787f32004-03-26 17:29:53 +00002010#define SKIPL(val) do { \
2011 int skipl; \
2012 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002013 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002014 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002015 } else ctxt->input->col++; \
2016 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002017 ctxt->input->cur++; \
2018 } \
2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 if ((*ctxt->input->cur == 0) && \
2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022 xmlPopInput(ctxt); \
2023 } while (0)
2024
Daniel Veillarda880b122003-04-21 21:36:41 +00002025#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002028 xmlSHRINK (ctxt);
2029
2030static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031 xmlParserInputShrink(ctxt->input);
2032 if ((*ctxt->input->cur == 0) &&
2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002035 }
Owen Taylor3473f882001-02-23 17:55:21 +00002036
Daniel Veillarda880b122003-04-21 21:36:41 +00002037#define GROW if ((ctxt->progressive == 0) && \
2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002039 xmlGROW (ctxt);
2040
2041static void xmlGROW (xmlParserCtxtPtr ctxt) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002042 if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002044 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002045 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002047 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002048 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002049 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002050 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002051 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2052 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002053}
Owen Taylor3473f882001-02-23 17:55:21 +00002054
2055#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2056
2057#define NEXT xmlNextChar(ctxt)
2058
Daniel Veillard21a0f912001-02-25 19:54:14 +00002059#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002060 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002061 ctxt->input->cur++; \
2062 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002063 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002064 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2065 }
2066
Owen Taylor3473f882001-02-23 17:55:21 +00002067#define NEXTL(l) do { \
2068 if (*(ctxt->input->cur) == '\n') { \
2069 ctxt->input->line++; ctxt->input->col = 1; \
2070 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002071 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002072 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002073 } while (0)
2074
2075#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2077
2078#define COPY_BUF(l,b,i,v) \
2079 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002080 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002081
2082/**
2083 * xmlSkipBlankChars:
2084 * @ctxt: the XML parser context
2085 *
2086 * skip all blanks character found at that point in the input streams.
2087 * It pops up finished entities in the process if allowable at that point.
2088 *
2089 * Returns the number of space chars skipped
2090 */
2091
2092int
2093xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002094 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002095
2096 /*
2097 * It's Okay to use CUR/NEXT here since all the blanks are on
2098 * the ASCII range.
2099 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002100 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2101 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002102 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002103 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002104 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002105 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002106 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002107 if (*cur == '\n') {
2108 ctxt->input->line++; ctxt->input->col = 1;
2109 }
2110 cur++;
2111 res++;
2112 if (*cur == 0) {
2113 ctxt->input->cur = cur;
2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115 cur = ctxt->input->cur;
2116 }
2117 }
2118 ctxt->input->cur = cur;
2119 } else {
2120 int cur;
2121 do {
2122 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002123 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002124 NEXT;
2125 cur = CUR;
2126 res++;
2127 }
2128 while ((cur == 0) && (ctxt->inputNr > 1) &&
2129 (ctxt->instate != XML_PARSER_COMMENT)) {
2130 xmlPopInput(ctxt);
2131 cur = CUR;
2132 }
2133 /*
2134 * Need to handle support of entities branching here
2135 */
2136 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2138 }
Owen Taylor3473f882001-02-23 17:55:21 +00002139 return(res);
2140}
2141
2142/************************************************************************
2143 * *
2144 * Commodity functions to handle entities *
2145 * *
2146 ************************************************************************/
2147
2148/**
2149 * xmlPopInput:
2150 * @ctxt: an XML parser context
2151 *
2152 * xmlPopInput: the current input pointed by ctxt->input came to an end
2153 * pop it and return the next char.
2154 *
2155 * Returns the current xmlChar in the parser context
2156 */
2157xmlChar
2158xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002159 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002160 if (xmlParserDebugEntities)
2161 xmlGenericError(xmlGenericErrorContext,
2162 "Popping input %d\n", ctxt->inputNr);
2163 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002164 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002165 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166 return(xmlPopInput(ctxt));
2167 return(CUR);
2168}
2169
2170/**
2171 * xmlPushInput:
2172 * @ctxt: an XML parser context
2173 * @input: an XML parser input fragment (entity, XML fragment ...).
2174 *
2175 * xmlPushInput: switch to a new input stream which is stacked on top
2176 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002177 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002178 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002179int
Owen Taylor3473f882001-02-23 17:55:21 +00002180xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002181 int ret;
2182 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002183
2184 if (xmlParserDebugEntities) {
2185 if ((ctxt->input != NULL) && (ctxt->input->filename))
2186 xmlGenericError(xmlGenericErrorContext,
2187 "%s(%d): ", ctxt->input->filename,
2188 ctxt->input->line);
2189 xmlGenericError(xmlGenericErrorContext,
2190 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2191 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002192 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002193 if (ctxt->instate == XML_PARSER_EOF)
2194 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002195 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002196 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002197}
2198
2199/**
2200 * xmlParseCharRef:
2201 * @ctxt: an XML parser context
2202 *
2203 * parse Reference declarations
2204 *
2205 * [66] CharRef ::= '&#' [0-9]+ ';' |
2206 * '&#x' [0-9a-fA-F]+ ';'
2207 *
2208 * [ WFC: Legal Character ]
2209 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002210 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002211 *
2212 * Returns the value parsed (as an int), 0 in case of error
2213 */
2214int
2215xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002216 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002217 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002218 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002219
Owen Taylor3473f882001-02-23 17:55:21 +00002220 /*
2221 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2222 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002223 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002224 (NXT(2) == 'x')) {
2225 SKIP(3);
2226 GROW;
2227 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002228 if (count++ > 20) {
2229 count = 0;
2230 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002231 if (ctxt->instate == XML_PARSER_EOF)
2232 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002233 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002234 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002235 val = val * 16 + (CUR - '0');
2236 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237 val = val * 16 + (CUR - 'a') + 10;
2238 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239 val = val * 16 + (CUR - 'A') + 10;
2240 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002241 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 val = 0;
2243 break;
2244 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002245 if (val > 0x10FFFF)
2246 outofrange = val;
2247
Owen Taylor3473f882001-02-23 17:55:21 +00002248 NEXT;
2249 count++;
2250 }
2251 if (RAW == ';') {
2252 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002253 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002254 ctxt->nbChars ++;
2255 ctxt->input->cur++;
2256 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002257 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002258 SKIP(2);
2259 GROW;
2260 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002261 if (count++ > 20) {
2262 count = 0;
2263 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002264 if (ctxt->instate == XML_PARSER_EOF)
2265 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002266 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002267 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002268 val = val * 10 + (CUR - '0');
2269 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002271 val = 0;
2272 break;
2273 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002274 if (val > 0x10FFFF)
2275 outofrange = val;
2276
Owen Taylor3473f882001-02-23 17:55:21 +00002277 NEXT;
2278 count++;
2279 }
2280 if (RAW == ';') {
2281 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002282 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002283 ctxt->nbChars ++;
2284 ctxt->input->cur++;
2285 }
2286 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002287 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002288 }
2289
2290 /*
2291 * [ WFC: Legal Character ]
2292 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002293 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002294 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002295 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002296 return(val);
2297 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002298 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299 "xmlParseCharRef: invalid xmlChar value %d\n",
2300 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002301 }
2302 return(0);
2303}
2304
2305/**
2306 * xmlParseStringCharRef:
2307 * @ctxt: an XML parser context
2308 * @str: a pointer to an index in the string
2309 *
2310 * parse Reference declarations, variant parsing from a string rather
2311 * than an an input flow.
2312 *
2313 * [66] CharRef ::= '&#' [0-9]+ ';' |
2314 * '&#x' [0-9a-fA-F]+ ';'
2315 *
2316 * [ WFC: Legal Character ]
2317 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002318 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002319 *
2320 * Returns the value parsed (as an int), 0 in case of error, str will be
2321 * updated to the current value of the index
2322 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002323static int
Owen Taylor3473f882001-02-23 17:55:21 +00002324xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2325 const xmlChar *ptr;
2326 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002327 unsigned int val = 0;
2328 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002329
2330 if ((str == NULL) || (*str == NULL)) return(0);
2331 ptr = *str;
2332 cur = *ptr;
2333 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2334 ptr += 3;
2335 cur = *ptr;
2336 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002337 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002338 val = val * 16 + (cur - '0');
2339 else if ((cur >= 'a') && (cur <= 'f'))
2340 val = val * 16 + (cur - 'a') + 10;
2341 else if ((cur >= 'A') && (cur <= 'F'))
2342 val = val * 16 + (cur - 'A') + 10;
2343 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002344 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002345 val = 0;
2346 break;
2347 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002348 if (val > 0x10FFFF)
2349 outofrange = val;
2350
Owen Taylor3473f882001-02-23 17:55:21 +00002351 ptr++;
2352 cur = *ptr;
2353 }
2354 if (cur == ';')
2355 ptr++;
2356 } else if ((cur == '&') && (ptr[1] == '#')){
2357 ptr += 2;
2358 cur = *ptr;
2359 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002360 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002361 val = val * 10 + (cur - '0');
2362 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002363 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002364 val = 0;
2365 break;
2366 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002367 if (val > 0x10FFFF)
2368 outofrange = val;
2369
Owen Taylor3473f882001-02-23 17:55:21 +00002370 ptr++;
2371 cur = *ptr;
2372 }
2373 if (cur == ';')
2374 ptr++;
2375 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002376 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002377 return(0);
2378 }
2379 *str = ptr;
2380
2381 /*
2382 * [ WFC: Legal Character ]
2383 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002384 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002385 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002386 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002387 return(val);
2388 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002389 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2391 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002392 }
2393 return(0);
2394}
2395
2396/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002397 * xmlNewBlanksWrapperInputStream:
2398 * @ctxt: an XML parser context
2399 * @entity: an Entity pointer
2400 *
2401 * Create a new input stream for wrapping
2402 * blanks around a PEReference
2403 *
2404 * Returns the new input stream or NULL
2405 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002406
Daniel Veillardf5582f12002-06-11 10:08:16 +00002407static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002408
Daniel Veillardf4862f02002-09-10 11:13:43 +00002409static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002410xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411 xmlParserInputPtr input;
2412 xmlChar *buffer;
2413 size_t length;
2414 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002417 return(NULL);
2418 }
2419 if (xmlParserDebugEntities)
2420 xmlGenericError(xmlGenericErrorContext,
2421 "new blanks wrapper for entity: %s\n", entity->name);
2422 input = xmlNewInputStream(ctxt);
2423 if (input == NULL) {
2424 return(NULL);
2425 }
2426 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002427 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002428 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002429 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002430 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002431 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002432 }
2433 buffer [0] = ' ';
2434 buffer [1] = '%';
2435 buffer [length-3] = ';';
2436 buffer [length-2] = ' ';
2437 buffer [length-1] = 0;
2438 memcpy(buffer + 2, entity->name, length - 5);
2439 input->free = deallocblankswrapper;
2440 input->base = buffer;
2441 input->cur = buffer;
2442 input->length = length;
2443 input->end = &buffer[length];
2444 return(input);
2445}
2446
2447/**
Owen Taylor3473f882001-02-23 17:55:21 +00002448 * xmlParserHandlePEReference:
2449 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002450 *
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * [69] PEReference ::= '%' Name ';'
2452 *
2453 * [ WFC: No Recursion ]
2454 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002455 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002456 *
2457 * [ WFC: Entity Declared ]
2458 * In a document without any DTD, a document with only an internal DTD
2459 * subset which contains no parameter entity references, or a document
2460 * with "standalone='yes'", ... ... The declaration of a parameter
2461 * entity must precede any reference to it...
2462 *
2463 * [ VC: Entity Declared ]
2464 * In a document with an external subset or external parameter entities
2465 * with "standalone='no'", ... ... The declaration of a parameter entity
2466 * must precede any reference to it...
2467 *
2468 * [ WFC: In DTD ]
2469 * Parameter-entity references may only appear in the DTD.
2470 * NOTE: misleading but this is handled.
2471 *
2472 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002473 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002474 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002475 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002476 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002477 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002478 */
2479void
2480xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002481 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002482 xmlEntityPtr entity = NULL;
2483 xmlParserInputPtr input;
2484
Owen Taylor3473f882001-02-23 17:55:21 +00002485 if (RAW != '%') return;
2486 switch(ctxt->instate) {
2487 case XML_PARSER_CDATA_SECTION:
2488 return;
2489 case XML_PARSER_COMMENT:
2490 return;
2491 case XML_PARSER_START_TAG:
2492 return;
2493 case XML_PARSER_END_TAG:
2494 return;
2495 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002496 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002497 return;
2498 case XML_PARSER_PROLOG:
2499 case XML_PARSER_START:
2500 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002501 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002502 return;
2503 case XML_PARSER_ENTITY_DECL:
2504 case XML_PARSER_CONTENT:
2505 case XML_PARSER_ATTRIBUTE_VALUE:
2506 case XML_PARSER_PI:
2507 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002508 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002509 /* we just ignore it there */
2510 return;
2511 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002512 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002513 return;
2514 case XML_PARSER_ENTITY_VALUE:
2515 /*
2516 * NOTE: in the case of entity values, we don't do the
2517 * substitution here since we need the literal
2518 * entity value to be able to save the internal
2519 * subset of the document.
2520 * This will be handled by xmlStringDecodeEntities
2521 */
2522 return;
2523 case XML_PARSER_DTD:
2524 /*
2525 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526 * In the internal DTD subset, parameter-entity references
2527 * can occur only where markup declarations can occur, not
2528 * within markup declarations.
2529 * In that case this is handled in xmlParseMarkupDecl
2530 */
2531 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2532 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002533 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002534 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002535 break;
2536 case XML_PARSER_IGNORE:
2537 return;
2538 }
2539
2540 NEXT;
2541 name = xmlParseName(ctxt);
2542 if (xmlParserDebugEntities)
2543 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002544 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002545 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002546 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002547 } else {
2548 if (RAW == ';') {
2549 NEXT;
2550 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2552 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002553
Owen Taylor3473f882001-02-23 17:55:21 +00002554 /*
2555 * [ WFC: Entity Declared ]
2556 * In a document without any DTD, a document with only an
2557 * internal DTD subset which contains no parameter entity
2558 * references, or a document with "standalone='yes'", ...
2559 * ... The declaration of a parameter entity must precede
2560 * any reference to it...
2561 */
2562 if ((ctxt->standalone == 1) ||
2563 ((ctxt->hasExternalSubset == 0) &&
2564 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002565 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002566 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002567 } else {
2568 /*
2569 * [ VC: Entity Declared ]
2570 * In a document with an external subset or external
2571 * parameter entities with "standalone='no'", ...
2572 * ... The declaration of a parameter entity must precede
2573 * any reference to it...
2574 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002575 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2576 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2577 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002578 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002579 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002580 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2581 "PEReference: %%%s; not found\n",
2582 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002583 ctxt->valid = 0;
2584 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002585 } else if (ctxt->input->free != deallocblankswrapper) {
2586 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002587 if (xmlPushInput(ctxt, input) < 0)
2588 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002589 } else {
2590 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2591 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002592 xmlChar start[4];
2593 xmlCharEncoding enc;
2594
Owen Taylor3473f882001-02-23 17:55:21 +00002595 /*
2596 * handle the extra spaces added before and after
2597 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002598 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002599 */
2600 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002601 if (xmlPushInput(ctxt, input) < 0)
2602 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002603
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002604 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002605 * Get the 4 first bytes and decode the charset
2606 * if enc != XML_CHAR_ENCODING_NONE
2607 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002608 * Note that, since we may have some non-UTF8
2609 * encoding (like UTF16, bug 135229), the 'length'
2610 * is not known, but we can calculate based upon
2611 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002612 */
2613 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002614 if (ctxt->instate == XML_PARSER_EOF)
2615 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002616 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002617 start[0] = RAW;
2618 start[1] = NXT(1);
2619 start[2] = NXT(2);
2620 start[3] = NXT(3);
2621 enc = xmlDetectCharEncoding(start, 4);
2622 if (enc != XML_CHAR_ENCODING_NONE) {
2623 xmlSwitchEncoding(ctxt, enc);
2624 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002625 }
2626
Owen Taylor3473f882001-02-23 17:55:21 +00002627 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002628 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2629 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002630 xmlParseTextDecl(ctxt);
2631 }
Owen Taylor3473f882001-02-23 17:55:21 +00002632 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002633 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2634 "PEReference: %s is not a parameter entity\n",
2635 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002636 }
2637 }
2638 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002639 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002640 }
Owen Taylor3473f882001-02-23 17:55:21 +00002641 }
2642}
2643
2644/*
2645 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002646 * buffer##_size is expected to be a size_t
2647 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002648 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002649#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002650 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002651 size_t new_size = buffer##_size * 2 + n; \
2652 if (new_size < buffer##_size) goto mem_error; \
2653 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002654 if (tmp == NULL) goto mem_error; \
2655 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002656 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002657}
2658
2659/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002660 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002661 * @ctxt: the parser context
2662 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002663 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002664 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2665 * @end: an end marker xmlChar, 0 if none
2666 * @end2: an end marker xmlChar, 0 if none
2667 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002668 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002669 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002670 *
2671 * [67] Reference ::= EntityRef | CharRef
2672 *
2673 * [69] PEReference ::= '%' Name ';'
2674 *
2675 * Returns A newly allocated string with the substitution done. The caller
2676 * must deallocate it !
2677 */
2678xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002679xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2680 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002681 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002682 size_t buffer_size = 0;
2683 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002684
2685 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002686 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002687 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002688 xmlEntityPtr ent;
2689 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002690
Daniel Veillarda82b1822004-11-08 16:24:57 +00002691 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002692 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002693 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002694
Daniel Veillard0161e632008-08-28 15:36:32 +00002695 if (((ctxt->depth > 40) &&
2696 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2697 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002698 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002699 return(NULL);
2700 }
2701
2702 /*
2703 * allocate a translation buffer.
2704 */
2705 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002706 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002707 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002708
2709 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002710 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002711 * we are operating on already parsed values.
2712 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002713 if (str < last)
2714 c = CUR_SCHAR(str, l);
2715 else
2716 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002717 while ((c != 0) && (c != end) && /* non input consuming loop */
2718 (c != end2) && (c != end3)) {
2719
2720 if (c == 0) break;
2721 if ((c == '&') && (str[1] == '#')) {
2722 int val = xmlParseStringCharRef(ctxt, &str);
2723 if (val != 0) {
2724 COPY_BUF(0,buffer,nbchars,val);
2725 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002726 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002727 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002728 }
Owen Taylor3473f882001-02-23 17:55:21 +00002729 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2730 if (xmlParserDebugEntities)
2731 xmlGenericError(xmlGenericErrorContext,
2732 "String decoding Entity Reference: %.30s\n",
2733 str);
2734 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002735 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2736 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002737 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002738 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002739 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002740 if ((ent != NULL) &&
2741 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2742 if (ent->content != NULL) {
2743 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002745 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002746 }
Owen Taylor3473f882001-02-23 17:55:21 +00002747 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002748 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2749 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002750 }
2751 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002752 ctxt->depth++;
2753 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2754 0, 0, 0);
2755 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002756
Owen Taylor3473f882001-02-23 17:55:21 +00002757 if (rep != NULL) {
2758 current = rep;
2759 while (*current != 0) { /* non input consuming loop */
2760 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002761 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002762 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002763 goto int_error;
2764 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002765 }
2766 }
2767 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002768 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002769 }
2770 } else if (ent != NULL) {
2771 int i = xmlStrlen(ent->name);
2772 const xmlChar *cur = ent->name;
2773
2774 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002775 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002776 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002777 }
2778 for (;i > 0;i--)
2779 buffer[nbchars++] = *cur++;
2780 buffer[nbchars++] = ';';
2781 }
2782 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2783 if (xmlParserDebugEntities)
2784 xmlGenericError(xmlGenericErrorContext,
2785 "String decoding PE Reference: %.30s\n", str);
2786 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002787 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2788 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002789 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002790 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002791 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002792 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002793 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002794 }
Owen Taylor3473f882001-02-23 17:55:21 +00002795 ctxt->depth++;
2796 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2797 0, 0, 0);
2798 ctxt->depth--;
2799 if (rep != NULL) {
2800 current = rep;
2801 while (*current != 0) { /* non input consuming loop */
2802 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002803 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002804 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002805 goto int_error;
2806 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002807 }
2808 }
2809 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002810 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002811 }
2812 }
2813 } else {
2814 COPY_BUF(l,buffer,nbchars,c);
2815 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002816 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2817 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002818 }
2819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002820 if (str < last)
2821 c = CUR_SCHAR(str, l);
2822 else
2823 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002824 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002825 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002826 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002827
2828mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002829 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002830int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002831 if (rep != NULL)
2832 xmlFree(rep);
2833 if (buffer != NULL)
2834 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002835 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002836}
2837
Daniel Veillarde57ec792003-09-10 10:50:59 +00002838/**
2839 * xmlStringDecodeEntities:
2840 * @ctxt: the parser context
2841 * @str: the input string
2842 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2843 * @end: an end marker xmlChar, 0 if none
2844 * @end2: an end marker xmlChar, 0 if none
2845 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002846 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002847 * Takes a entity string content and process to do the adequate substitutions.
2848 *
2849 * [67] Reference ::= EntityRef | CharRef
2850 *
2851 * [69] PEReference ::= '%' Name ';'
2852 *
2853 * Returns A newly allocated string with the substitution done. The caller
2854 * must deallocate it !
2855 */
2856xmlChar *
2857xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2858 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002859 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002860 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2861 end, end2, end3));
2862}
Owen Taylor3473f882001-02-23 17:55:21 +00002863
2864/************************************************************************
2865 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002866 * Commodity functions, cleanup needed ? *
2867 * *
2868 ************************************************************************/
2869
2870/**
2871 * areBlanks:
2872 * @ctxt: an XML parser context
2873 * @str: a xmlChar *
2874 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002875 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002876 *
2877 * Is this a sequence of blank chars that one can ignore ?
2878 *
2879 * Returns 1 if ignorable 0 otherwise.
2880 */
2881
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002882static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2883 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002884 int i, ret;
2885 xmlNodePtr lastChild;
2886
Daniel Veillard05c13a22001-09-09 08:38:09 +00002887 /*
2888 * Don't spend time trying to differentiate them, the same callback is
2889 * used !
2890 */
2891 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002892 return(0);
2893
Owen Taylor3473f882001-02-23 17:55:21 +00002894 /*
2895 * Check for xml:space value.
2896 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002897 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2898 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002899 return(0);
2900
2901 /*
2902 * Check that the string is made of blanks
2903 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002904 if (blank_chars == 0) {
2905 for (i = 0;i < len;i++)
2906 if (!(IS_BLANK_CH(str[i]))) return(0);
2907 }
Owen Taylor3473f882001-02-23 17:55:21 +00002908
2909 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002910 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002911 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002912 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002913 if (ctxt->myDoc != NULL) {
2914 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2915 if (ret == 0) return(1);
2916 if (ret == 1) return(0);
2917 }
2918
2919 /*
2920 * Otherwise, heuristic :-\
2921 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002922 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002923 if ((ctxt->node->children == NULL) &&
2924 (RAW == '<') && (NXT(1) == '/')) return(0);
2925
2926 lastChild = xmlGetLastChild(ctxt->node);
2927 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002928 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2929 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002930 } else if (xmlNodeIsText(lastChild))
2931 return(0);
2932 else if ((ctxt->node->children != NULL) &&
2933 (xmlNodeIsText(ctxt->node->children)))
2934 return(0);
2935 return(1);
2936}
2937
Owen Taylor3473f882001-02-23 17:55:21 +00002938/************************************************************************
2939 * *
2940 * Extra stuff for namespace support *
2941 * Relates to http://www.w3.org/TR/WD-xml-names *
2942 * *
2943 ************************************************************************/
2944
2945/**
2946 * xmlSplitQName:
2947 * @ctxt: an XML parser context
2948 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002949 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002950 *
2951 * parse an UTF8 encoded XML qualified name string
2952 *
2953 * [NS 5] QName ::= (Prefix ':')? LocalPart
2954 *
2955 * [NS 6] Prefix ::= NCName
2956 *
2957 * [NS 7] LocalPart ::= NCName
2958 *
2959 * Returns the local part, and prefix is updated
2960 * to get the Prefix if any.
2961 */
2962
2963xmlChar *
2964xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2965 xmlChar buf[XML_MAX_NAMELEN + 5];
2966 xmlChar *buffer = NULL;
2967 int len = 0;
2968 int max = XML_MAX_NAMELEN;
2969 xmlChar *ret = NULL;
2970 const xmlChar *cur = name;
2971 int c;
2972
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002973 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002974 *prefix = NULL;
2975
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002976 if (cur == NULL) return(NULL);
2977
Owen Taylor3473f882001-02-23 17:55:21 +00002978#ifndef XML_XML_NAMESPACE
2979 /* xml: prefix is not really a namespace */
2980 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2981 (cur[2] == 'l') && (cur[3] == ':'))
2982 return(xmlStrdup(name));
2983#endif
2984
Daniel Veillard597bc482003-07-24 16:08:28 +00002985 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002986 if (cur[0] == ':')
2987 return(xmlStrdup(name));
2988
2989 c = *cur++;
2990 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2991 buf[len++] = c;
2992 c = *cur++;
2993 }
2994 if (len >= max) {
2995 /*
2996 * Okay someone managed to make a huge name, so he's ready to pay
2997 * for the processing speed.
2998 */
2999 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003000
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003001 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003002 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003003 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003004 return(NULL);
3005 }
3006 memcpy(buffer, buf, len);
3007 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3008 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003009 xmlChar *tmp;
3010
Owen Taylor3473f882001-02-23 17:55:21 +00003011 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003012 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003013 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003014 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003015 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003016 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003017 return(NULL);
3018 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003019 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003020 }
3021 buffer[len++] = c;
3022 c = *cur++;
3023 }
3024 buffer[len] = 0;
3025 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003026
Daniel Veillard597bc482003-07-24 16:08:28 +00003027 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003028 if (buffer != NULL)
3029 xmlFree(buffer);
3030 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003031 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003032 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003033
Owen Taylor3473f882001-02-23 17:55:21 +00003034 if (buffer == NULL)
3035 ret = xmlStrndup(buf, len);
3036 else {
3037 ret = buffer;
3038 buffer = NULL;
3039 max = XML_MAX_NAMELEN;
3040 }
3041
3042
3043 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003044 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003045 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003046 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003047 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003048 }
Owen Taylor3473f882001-02-23 17:55:21 +00003049 len = 0;
3050
Daniel Veillardbb284f42002-10-16 18:02:47 +00003051 /*
3052 * Check that the first character is proper to start
3053 * a new name
3054 */
3055 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3056 ((c >= 0x41) && (c <= 0x5A)) ||
3057 (c == '_') || (c == ':'))) {
3058 int l;
3059 int first = CUR_SCHAR(cur, l);
3060
3061 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003062 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003063 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003064 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003065 }
3066 }
3067 cur++;
3068
Owen Taylor3473f882001-02-23 17:55:21 +00003069 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3070 buf[len++] = c;
3071 c = *cur++;
3072 }
3073 if (len >= max) {
3074 /*
3075 * Okay someone managed to make a huge name, so he's ready to pay
3076 * for the processing speed.
3077 */
3078 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003079
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003080 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003082 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003083 return(NULL);
3084 }
3085 memcpy(buffer, buf, len);
3086 while (c != 0) { /* tested bigname2.xml */
3087 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003088 xmlChar *tmp;
3089
Owen Taylor3473f882001-02-23 17:55:21 +00003090 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003091 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003092 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003093 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003094 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003095 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003096 return(NULL);
3097 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003098 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003099 }
3100 buffer[len++] = c;
3101 c = *cur++;
3102 }
3103 buffer[len] = 0;
3104 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003105
Owen Taylor3473f882001-02-23 17:55:21 +00003106 if (buffer == NULL)
3107 ret = xmlStrndup(buf, len);
3108 else {
3109 ret = buffer;
3110 }
3111 }
3112
3113 return(ret);
3114}
3115
3116/************************************************************************
3117 * *
3118 * The parser itself *
3119 * Relates to http://www.w3.org/TR/REC-xml *
3120 * *
3121 ************************************************************************/
3122
Daniel Veillard34e3f642008-07-29 09:02:27 +00003123/************************************************************************
3124 * *
3125 * Routines to parse Name, NCName and NmToken *
3126 * *
3127 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003128#ifdef DEBUG
3129static unsigned long nbParseName = 0;
3130static unsigned long nbParseNmToken = 0;
3131static unsigned long nbParseNCName = 0;
3132static unsigned long nbParseNCNameComplex = 0;
3133static unsigned long nbParseNameComplex = 0;
3134static unsigned long nbParseStringName = 0;
3135#endif
3136
Daniel Veillard34e3f642008-07-29 09:02:27 +00003137/*
3138 * The two following functions are related to the change of accepted
3139 * characters for Name and NmToken in the Revision 5 of XML-1.0
3140 * They correspond to the modified production [4] and the new production [4a]
3141 * changes in that revision. Also note that the macros used for the
3142 * productions Letter, Digit, CombiningChar and Extender are not needed
3143 * anymore.
3144 * We still keep compatibility to pre-revision5 parsing semantic if the
3145 * new XML_PARSE_OLD10 option is given to the parser.
3146 */
3147static int
3148xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3149 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3150 /*
3151 * Use the new checks of production [4] [4a] amd [5] of the
3152 * Update 5 of XML-1.0
3153 */
3154 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3155 (((c >= 'a') && (c <= 'z')) ||
3156 ((c >= 'A') && (c <= 'Z')) ||
3157 (c == '_') || (c == ':') ||
3158 ((c >= 0xC0) && (c <= 0xD6)) ||
3159 ((c >= 0xD8) && (c <= 0xF6)) ||
3160 ((c >= 0xF8) && (c <= 0x2FF)) ||
3161 ((c >= 0x370) && (c <= 0x37D)) ||
3162 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3163 ((c >= 0x200C) && (c <= 0x200D)) ||
3164 ((c >= 0x2070) && (c <= 0x218F)) ||
3165 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3166 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3167 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3168 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3169 ((c >= 0x10000) && (c <= 0xEFFFF))))
3170 return(1);
3171 } else {
3172 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3173 return(1);
3174 }
3175 return(0);
3176}
3177
3178static int
3179xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3180 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3181 /*
3182 * Use the new checks of production [4] [4a] amd [5] of the
3183 * Update 5 of XML-1.0
3184 */
3185 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3186 (((c >= 'a') && (c <= 'z')) ||
3187 ((c >= 'A') && (c <= 'Z')) ||
3188 ((c >= '0') && (c <= '9')) || /* !start */
3189 (c == '_') || (c == ':') ||
3190 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3191 ((c >= 0xC0) && (c <= 0xD6)) ||
3192 ((c >= 0xD8) && (c <= 0xF6)) ||
3193 ((c >= 0xF8) && (c <= 0x2FF)) ||
3194 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3195 ((c >= 0x370) && (c <= 0x37D)) ||
3196 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3197 ((c >= 0x200C) && (c <= 0x200D)) ||
3198 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3199 ((c >= 0x2070) && (c <= 0x218F)) ||
3200 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3201 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3202 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3203 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3204 ((c >= 0x10000) && (c <= 0xEFFFF))))
3205 return(1);
3206 } else {
3207 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3208 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003209 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003210 (IS_COMBINING(c)) ||
3211 (IS_EXTENDER(c)))
3212 return(1);
3213 }
3214 return(0);
3215}
3216
Daniel Veillarde57ec792003-09-10 10:50:59 +00003217static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003218 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003219
Daniel Veillard34e3f642008-07-29 09:02:27 +00003220static const xmlChar *
3221xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3222 int len = 0, l;
3223 int c;
3224 int count = 0;
3225
Daniel Veillardc6561462009-03-25 10:22:31 +00003226#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003227 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003228#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003229
3230 /*
3231 * Handler for more complex cases
3232 */
3233 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003234 if (ctxt->instate == XML_PARSER_EOF)
3235 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003236 c = CUR_CHAR(l);
3237 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3238 /*
3239 * Use the new checks of production [4] [4a] amd [5] of the
3240 * Update 5 of XML-1.0
3241 */
3242 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3243 (!(((c >= 'a') && (c <= 'z')) ||
3244 ((c >= 'A') && (c <= 'Z')) ||
3245 (c == '_') || (c == ':') ||
3246 ((c >= 0xC0) && (c <= 0xD6)) ||
3247 ((c >= 0xD8) && (c <= 0xF6)) ||
3248 ((c >= 0xF8) && (c <= 0x2FF)) ||
3249 ((c >= 0x370) && (c <= 0x37D)) ||
3250 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3251 ((c >= 0x200C) && (c <= 0x200D)) ||
3252 ((c >= 0x2070) && (c <= 0x218F)) ||
3253 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3254 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3255 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3256 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3257 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3258 return(NULL);
3259 }
3260 len += l;
3261 NEXTL(l);
3262 c = CUR_CHAR(l);
3263 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3264 (((c >= 'a') && (c <= 'z')) ||
3265 ((c >= 'A') && (c <= 'Z')) ||
3266 ((c >= '0') && (c <= '9')) || /* !start */
3267 (c == '_') || (c == ':') ||
3268 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3269 ((c >= 0xC0) && (c <= 0xD6)) ||
3270 ((c >= 0xD8) && (c <= 0xF6)) ||
3271 ((c >= 0xF8) && (c <= 0x2FF)) ||
3272 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3273 ((c >= 0x370) && (c <= 0x37D)) ||
3274 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3275 ((c >= 0x200C) && (c <= 0x200D)) ||
3276 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3277 ((c >= 0x2070) && (c <= 0x218F)) ||
3278 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3279 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3280 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3281 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3282 ((c >= 0x10000) && (c <= 0xEFFFF))
3283 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003284 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003285 count = 0;
3286 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003287 if (ctxt->instate == XML_PARSER_EOF)
3288 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003289 }
3290 len += l;
3291 NEXTL(l);
3292 c = CUR_CHAR(l);
3293 }
3294 } else {
3295 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296 (!IS_LETTER(c) && (c != '_') &&
3297 (c != ':'))) {
3298 return(NULL);
3299 }
3300 len += l;
3301 NEXTL(l);
3302 c = CUR_CHAR(l);
3303
3304 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003307 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003308 (IS_COMBINING(c)) ||
3309 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003310 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003311 count = 0;
3312 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003313 if (ctxt->instate == XML_PARSER_EOF)
3314 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003315 }
3316 len += l;
3317 NEXTL(l);
3318 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003319 if (c == 0) {
3320 count = 0;
3321 GROW;
3322 if (ctxt->instate == XML_PARSER_EOF)
3323 return(NULL);
3324 c = CUR_CHAR(l);
3325 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003326 }
3327 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003328 if ((len > XML_MAX_NAME_LENGTH) &&
3329 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3330 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3331 return(NULL);
3332 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003333 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3334 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3335 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3336}
3337
Owen Taylor3473f882001-02-23 17:55:21 +00003338/**
3339 * xmlParseName:
3340 * @ctxt: an XML parser context
3341 *
3342 * parse an XML name.
3343 *
3344 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3345 * CombiningChar | Extender
3346 *
3347 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3348 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003349 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003350 *
3351 * Returns the Name parsed or NULL
3352 */
3353
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003354const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003355xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003356 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003357 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003358 int count = 0;
3359
3360 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003361
Daniel Veillardc6561462009-03-25 10:22:31 +00003362#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003363 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003364#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003365
Daniel Veillard48b2f892001-02-25 16:11:03 +00003366 /*
3367 * Accelerator for simple ASCII names
3368 */
3369 in = ctxt->input->cur;
3370 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371 ((*in >= 0x41) && (*in <= 0x5A)) ||
3372 (*in == '_') || (*in == ':')) {
3373 in++;
3374 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375 ((*in >= 0x41) && (*in <= 0x5A)) ||
3376 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003377 (*in == '_') || (*in == '-') ||
3378 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003379 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003380 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003381 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003382 if ((count > XML_MAX_NAME_LENGTH) &&
3383 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3384 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3385 return(NULL);
3386 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003387 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003388 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003389 ctxt->nbChars += count;
3390 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003391 if (ret == NULL)
3392 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003393 return(ret);
3394 }
3395 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003396 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003397 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003398}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003399
Daniel Veillard34e3f642008-07-29 09:02:27 +00003400static const xmlChar *
3401xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402 int len = 0, l;
3403 int c;
3404 int count = 0;
3405
Daniel Veillardc6561462009-03-25 10:22:31 +00003406#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003407 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003408#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003409
3410 /*
3411 * Handler for more complex cases
3412 */
3413 GROW;
3414 c = CUR_CHAR(l);
3415 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3416 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3417 return(NULL);
3418 }
3419
3420 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3421 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003422 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003423 if ((len > XML_MAX_NAME_LENGTH) &&
3424 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3425 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3426 return(NULL);
3427 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003428 count = 0;
3429 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003430 if (ctxt->instate == XML_PARSER_EOF)
3431 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 }
3433 len += l;
3434 NEXTL(l);
3435 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003436 if (c == 0) {
3437 count = 0;
3438 GROW;
3439 if (ctxt->instate == XML_PARSER_EOF)
3440 return(NULL);
3441 c = CUR_CHAR(l);
3442 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003443 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003444 if ((len > XML_MAX_NAME_LENGTH) &&
3445 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3446 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3447 return(NULL);
3448 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003449 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3450}
3451
3452/**
3453 * xmlParseNCName:
3454 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003455 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003456 *
3457 * parse an XML name.
3458 *
3459 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3460 * CombiningChar | Extender
3461 *
3462 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3463 *
3464 * Returns the Name parsed or NULL
3465 */
3466
3467static const xmlChar *
3468xmlParseNCName(xmlParserCtxtPtr ctxt) {
3469 const xmlChar *in;
3470 const xmlChar *ret;
3471 int count = 0;
3472
Daniel Veillardc6561462009-03-25 10:22:31 +00003473#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003474 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003475#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003476
3477 /*
3478 * Accelerator for simple ASCII names
3479 */
3480 in = ctxt->input->cur;
3481 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3482 ((*in >= 0x41) && (*in <= 0x5A)) ||
3483 (*in == '_')) {
3484 in++;
3485 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3486 ((*in >= 0x41) && (*in <= 0x5A)) ||
3487 ((*in >= 0x30) && (*in <= 0x39)) ||
3488 (*in == '_') || (*in == '-') ||
3489 (*in == '.'))
3490 in++;
3491 if ((*in > 0) && (*in < 0x80)) {
3492 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003493 if ((count > XML_MAX_NAME_LENGTH) &&
3494 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3495 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3496 return(NULL);
3497 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003498 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3499 ctxt->input->cur = in;
3500 ctxt->nbChars += count;
3501 ctxt->input->col += count;
3502 if (ret == NULL) {
3503 xmlErrMemory(ctxt, NULL);
3504 }
3505 return(ret);
3506 }
3507 }
3508 return(xmlParseNCNameComplex(ctxt));
3509}
3510
Daniel Veillard46de64e2002-05-29 08:21:33 +00003511/**
3512 * xmlParseNameAndCompare:
3513 * @ctxt: an XML parser context
3514 *
3515 * parse an XML name and compares for match
3516 * (specialized for endtag parsing)
3517 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003518 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3519 * and the name for mismatch
3520 */
3521
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003522static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003523xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003524 register const xmlChar *cmp = other;
3525 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003526 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003527
3528 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003529 if (ctxt->instate == XML_PARSER_EOF)
3530 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003531
Daniel Veillard46de64e2002-05-29 08:21:33 +00003532 in = ctxt->input->cur;
3533 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003534 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003535 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003536 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003537 }
William M. Brack76e95df2003-10-18 16:20:14 +00003538 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003539 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003540 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003541 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003542 }
3543 /* failure (or end of input buffer), check with full function */
3544 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003545 /* strings coming from the dictionnary direct compare possible */
3546 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003547 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003548 }
3549 return ret;
3550}
3551
Owen Taylor3473f882001-02-23 17:55:21 +00003552/**
3553 * xmlParseStringName:
3554 * @ctxt: an XML parser context
3555 * @str: a pointer to the string pointer (IN/OUT)
3556 *
3557 * parse an XML name.
3558 *
3559 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3560 * CombiningChar | Extender
3561 *
3562 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3563 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003564 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003565 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003566 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003567 * is updated to the current location in the string.
3568 */
3569
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003570static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003571xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3572 xmlChar buf[XML_MAX_NAMELEN + 5];
3573 const xmlChar *cur = *str;
3574 int len = 0, l;
3575 int c;
3576
Daniel Veillardc6561462009-03-25 10:22:31 +00003577#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003578 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003579#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003580
Owen Taylor3473f882001-02-23 17:55:21 +00003581 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003582 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003583 return(NULL);
3584 }
3585
Daniel Veillard34e3f642008-07-29 09:02:27 +00003586 COPY_BUF(l,buf,len,c);
3587 cur += l;
3588 c = CUR_SCHAR(cur, l);
3589 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003590 COPY_BUF(l,buf,len,c);
3591 cur += l;
3592 c = CUR_SCHAR(cur, l);
3593 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3594 /*
3595 * Okay someone managed to make a huge name, so he's ready to pay
3596 * for the processing speed.
3597 */
3598 xmlChar *buffer;
3599 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003600
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003601 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003602 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003603 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003604 return(NULL);
3605 }
3606 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003607 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003608 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003609 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003610
3611 if ((len > XML_MAX_NAME_LENGTH) &&
3612 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3613 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614 xmlFree(buffer);
3615 return(NULL);
3616 }
Owen Taylor3473f882001-02-23 17:55:21 +00003617 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003618 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003619 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003620 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003621 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003622 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003623 return(NULL);
3624 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003625 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003626 }
3627 COPY_BUF(l,buffer,len,c);
3628 cur += l;
3629 c = CUR_SCHAR(cur, l);
3630 }
3631 buffer[len] = 0;
3632 *str = cur;
3633 return(buffer);
3634 }
3635 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003636 if ((len > XML_MAX_NAME_LENGTH) &&
3637 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3638 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3639 return(NULL);
3640 }
Owen Taylor3473f882001-02-23 17:55:21 +00003641 *str = cur;
3642 return(xmlStrndup(buf, len));
3643}
3644
3645/**
3646 * xmlParseNmtoken:
3647 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003648 *
Owen Taylor3473f882001-02-23 17:55:21 +00003649 * parse an XML Nmtoken.
3650 *
3651 * [7] Nmtoken ::= (NameChar)+
3652 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003653 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003654 *
3655 * Returns the Nmtoken parsed or NULL
3656 */
3657
3658xmlChar *
3659xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3660 xmlChar buf[XML_MAX_NAMELEN + 5];
3661 int len = 0, l;
3662 int c;
3663 int count = 0;
3664
Daniel Veillardc6561462009-03-25 10:22:31 +00003665#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003666 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003667#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003668
Owen Taylor3473f882001-02-23 17:55:21 +00003669 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003670 if (ctxt->instate == XML_PARSER_EOF)
3671 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003672 c = CUR_CHAR(l);
3673
Daniel Veillard34e3f642008-07-29 09:02:27 +00003674 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003675 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003676 count = 0;
3677 GROW;
3678 }
3679 COPY_BUF(l,buf,len,c);
3680 NEXTL(l);
3681 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003682 if (c == 0) {
3683 count = 0;
3684 GROW;
3685 if (ctxt->instate == XML_PARSER_EOF)
3686 return(NULL);
3687 c = CUR_CHAR(l);
3688 }
Owen Taylor3473f882001-02-23 17:55:21 +00003689 if (len >= XML_MAX_NAMELEN) {
3690 /*
3691 * Okay someone managed to make a huge token, so he's ready to pay
3692 * for the processing speed.
3693 */
3694 xmlChar *buffer;
3695 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003696
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003697 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003698 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003699 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003700 return(NULL);
3701 }
3702 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003703 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003704 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003705 count = 0;
3706 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003707 if (ctxt->instate == XML_PARSER_EOF) {
3708 xmlFree(buffer);
3709 return(NULL);
3710 }
Owen Taylor3473f882001-02-23 17:55:21 +00003711 }
3712 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003713 xmlChar *tmp;
3714
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003715 if ((max > XML_MAX_NAME_LENGTH) &&
3716 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3717 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3718 xmlFree(buffer);
3719 return(NULL);
3720 }
Owen Taylor3473f882001-02-23 17:55:21 +00003721 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003722 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003723 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003724 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003725 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003726 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003727 return(NULL);
3728 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003729 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003730 }
3731 COPY_BUF(l,buffer,len,c);
3732 NEXTL(l);
3733 c = CUR_CHAR(l);
3734 }
3735 buffer[len] = 0;
3736 return(buffer);
3737 }
3738 }
3739 if (len == 0)
3740 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003741 if ((len > XML_MAX_NAME_LENGTH) &&
3742 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3743 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744 return(NULL);
3745 }
Owen Taylor3473f882001-02-23 17:55:21 +00003746 return(xmlStrndup(buf, len));
3747}
3748
3749/**
3750 * xmlParseEntityValue:
3751 * @ctxt: an XML parser context
3752 * @orig: if non-NULL store a copy of the original entity value
3753 *
3754 * parse a value for ENTITY declarations
3755 *
3756 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3757 * "'" ([^%&'] | PEReference | Reference)* "'"
3758 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003759 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003760 */
3761
3762xmlChar *
3763xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3764 xmlChar *buf = NULL;
3765 int len = 0;
3766 int size = XML_PARSER_BUFFER_SIZE;
3767 int c, l;
3768 xmlChar stop;
3769 xmlChar *ret = NULL;
3770 const xmlChar *cur = NULL;
3771 xmlParserInputPtr input;
3772
3773 if (RAW == '"') stop = '"';
3774 else if (RAW == '\'') stop = '\'';
3775 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003776 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003777 return(NULL);
3778 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003779 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003780 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003781 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 return(NULL);
3783 }
3784
3785 /*
3786 * The content of the entity definition is copied in a buffer.
3787 */
3788
3789 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3790 input = ctxt->input;
3791 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003792 if (ctxt->instate == XML_PARSER_EOF) {
3793 xmlFree(buf);
3794 return(NULL);
3795 }
Owen Taylor3473f882001-02-23 17:55:21 +00003796 NEXT;
3797 c = CUR_CHAR(l);
3798 /*
3799 * NOTE: 4.4.5 Included in Literal
3800 * When a parameter entity reference appears in a literal entity
3801 * value, ... a single or double quote character in the replacement
3802 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003803 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003804 * In practice it means we stop the loop only when back at parsing
3805 * the initial entity and the quote is found
3806 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003807 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3808 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003809 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003810 xmlChar *tmp;
3811
Owen Taylor3473f882001-02-23 17:55:21 +00003812 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003813 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3814 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003815 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003816 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003817 return(NULL);
3818 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003819 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003820 }
3821 COPY_BUF(l,buf,len,c);
3822 NEXTL(l);
3823 /*
3824 * Pop-up of finished entities.
3825 */
3826 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3827 xmlPopInput(ctxt);
3828
3829 GROW;
3830 c = CUR_CHAR(l);
3831 if (c == 0) {
3832 GROW;
3833 c = CUR_CHAR(l);
3834 }
3835 }
3836 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003837 if (ctxt->instate == XML_PARSER_EOF) {
3838 xmlFree(buf);
3839 return(NULL);
3840 }
Owen Taylor3473f882001-02-23 17:55:21 +00003841
3842 /*
3843 * Raise problem w.r.t. '&' and '%' being used in non-entities
3844 * reference constructs. Note Charref will be handled in
3845 * xmlStringDecodeEntities()
3846 */
3847 cur = buf;
3848 while (*cur != 0) { /* non input consuming */
3849 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3850 xmlChar *name;
3851 xmlChar tmp = *cur;
3852
3853 cur++;
3854 name = xmlParseStringName(ctxt, &cur);
3855 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003856 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003857 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003858 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003859 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003860 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3861 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003862 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003863 }
3864 if (name != NULL)
3865 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003866 if (*cur == 0)
3867 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003868 }
3869 cur++;
3870 }
3871
3872 /*
3873 * Then PEReference entities are substituted.
3874 */
3875 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003876 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003877 xmlFree(buf);
3878 } else {
3879 NEXT;
3880 /*
3881 * NOTE: 4.4.7 Bypassed
3882 * When a general entity reference appears in the EntityValue in
3883 * an entity declaration, it is bypassed and left as is.
3884 * so XML_SUBSTITUTE_REF is not set here.
3885 */
3886 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3887 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003888 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003889 *orig = buf;
3890 else
3891 xmlFree(buf);
3892 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003893
Owen Taylor3473f882001-02-23 17:55:21 +00003894 return(ret);
3895}
3896
3897/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003898 * xmlParseAttValueComplex:
3899 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003900 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003901 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003902 *
3903 * parse a value for an attribute, this is the fallback function
3904 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003905 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003906 *
3907 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3908 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003909static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003910xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003911 xmlChar limit = 0;
3912 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003913 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003914 size_t len = 0;
3915 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003916 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003917 xmlChar *current = NULL;
3918 xmlEntityPtr ent;
3919
Owen Taylor3473f882001-02-23 17:55:21 +00003920 if (NXT(0) == '"') {
3921 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3922 limit = '"';
3923 NEXT;
3924 } else if (NXT(0) == '\'') {
3925 limit = '\'';
3926 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3927 NEXT;
3928 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003929 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003930 return(NULL);
3931 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003932
Owen Taylor3473f882001-02-23 17:55:21 +00003933 /*
3934 * allocate a translation buffer.
3935 */
3936 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003937 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003938 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003939
3940 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003941 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003942 */
3943 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003944 while (((NXT(0) != limit) && /* checked */
3945 (IS_CHAR(c)) && (c != '<')) &&
3946 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003947 /*
3948 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3949 * special option is given
3950 */
3951 if ((len > XML_MAX_TEXT_LENGTH) &&
3952 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3953 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003954 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003955 goto mem_error;
3956 }
Owen Taylor3473f882001-02-23 17:55:21 +00003957 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003958 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003959 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003960 if (NXT(1) == '#') {
3961 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003962
Owen Taylor3473f882001-02-23 17:55:21 +00003963 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003964 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003965 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003966 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003967 }
3968 buf[len++] = '&';
3969 } else {
3970 /*
3971 * The reparsing will be done in xmlStringGetNodeList()
3972 * called by the attribute() function in SAX.c
3973 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003974 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003975 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003976 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003977 buf[len++] = '&';
3978 buf[len++] = '#';
3979 buf[len++] = '3';
3980 buf[len++] = '8';
3981 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003982 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003983 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003984 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003985 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003986 }
Owen Taylor3473f882001-02-23 17:55:21 +00003987 len += xmlCopyChar(0, &buf[len], val);
3988 }
3989 } else {
3990 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003991 ctxt->nbentities++;
3992 if (ent != NULL)
3993 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003994 if ((ent != NULL) &&
3995 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003996 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003997 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003998 }
3999 if ((ctxt->replaceEntities == 0) &&
4000 (ent->content[0] == '&')) {
4001 buf[len++] = '&';
4002 buf[len++] = '#';
4003 buf[len++] = '3';
4004 buf[len++] = '8';
4005 buf[len++] = ';';
4006 } else {
4007 buf[len++] = ent->content[0];
4008 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004009 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004010 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004011 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4012 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004013 XML_SUBSTITUTE_REF,
4014 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004015 if (rep != NULL) {
4016 current = rep;
4017 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004018 if ((*current == 0xD) || (*current == 0xA) ||
4019 (*current == 0x9)) {
4020 buf[len++] = 0x20;
4021 current++;
4022 } else
4023 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004024 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004025 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004026 }
4027 }
4028 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004029 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004030 }
4031 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004032 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004033 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004034 }
Owen Taylor3473f882001-02-23 17:55:21 +00004035 if (ent->content != NULL)
4036 buf[len++] = ent->content[0];
4037 }
4038 } else if (ent != NULL) {
4039 int i = xmlStrlen(ent->name);
4040 const xmlChar *cur = ent->name;
4041
4042 /*
4043 * This may look absurd but is needed to detect
4044 * entities problems
4045 */
4046 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004047 (ent->content != NULL) && (ent->checked == 0)) {
4048 unsigned long oldnbent = ctxt->nbentities;
4049
Owen Taylor3473f882001-02-23 17:55:21 +00004050 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004051 XML_SUBSTITUTE_REF, 0, 0, 0);
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004052
Daniel Veillardcff25462013-03-11 15:57:55 +08004053 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004054 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004055 if (xmlStrchr(rep, '<'))
4056 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004057 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004058 rep = NULL;
4059 }
Owen Taylor3473f882001-02-23 17:55:21 +00004060 }
4061
4062 /*
4063 * Just output the reference
4064 */
4065 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004066 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004067 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004068 }
4069 for (;i > 0;i--)
4070 buf[len++] = *cur++;
4071 buf[len++] = ';';
4072 }
4073 }
4074 } else {
4075 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004076 if ((len != 0) || (!normalize)) {
4077 if ((!normalize) || (!in_space)) {
4078 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004079 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004080 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004081 }
4082 }
4083 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004084 }
4085 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004086 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004087 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004088 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004089 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004090 }
4091 }
4092 NEXTL(l);
4093 }
4094 GROW;
4095 c = CUR_CHAR(l);
4096 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004097 if (ctxt->instate == XML_PARSER_EOF)
4098 goto error;
4099
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004100 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004101 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004102 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004103 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004104 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004105 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004106 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004107 if ((c != 0) && (!IS_CHAR(c))) {
4108 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4109 "invalid character in attribute value\n");
4110 } else {
4111 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4112 "AttValue: ' expected\n");
4113 }
Owen Taylor3473f882001-02-23 17:55:21 +00004114 } else
4115 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004116
4117 /*
4118 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004119 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004120 */
4121 if (len >= INT_MAX) {
4122 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004123 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004124 goto mem_error;
4125 }
4126
4127 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004128 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004129
4130mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004131 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004132error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004133 if (buf != NULL)
4134 xmlFree(buf);
4135 if (rep != NULL)
4136 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004137 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004138}
4139
4140/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004141 * xmlParseAttValue:
4142 * @ctxt: an XML parser context
4143 *
4144 * parse a value for an attribute
4145 * Note: the parser won't do substitution of entities here, this
4146 * will be handled later in xmlStringGetNodeList
4147 *
4148 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4149 * "'" ([^<&'] | Reference)* "'"
4150 *
4151 * 3.3.3 Attribute-Value Normalization:
4152 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004153 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004154 * - a character reference is processed by appending the referenced
4155 * character to the attribute value
4156 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004157 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004158 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4159 * appending #x20 to the normalized value, except that only a single
4160 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004161 * parsed entity or the literal entity value of an internal parsed entity
4162 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004163 * If the declared value is not CDATA, then the XML processor must further
4164 * process the normalized attribute value by discarding any leading and
4165 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004166 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004167 * All attributes for which no declaration has been read should be treated
4168 * by a non-validating parser as if declared CDATA.
4169 *
4170 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4171 */
4172
4173
4174xmlChar *
4175xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004176 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004177 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004178}
4179
4180/**
Owen Taylor3473f882001-02-23 17:55:21 +00004181 * xmlParseSystemLiteral:
4182 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004183 *
Owen Taylor3473f882001-02-23 17:55:21 +00004184 * parse an XML Literal
4185 *
4186 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4187 *
4188 * Returns the SystemLiteral parsed or NULL
4189 */
4190
4191xmlChar *
4192xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4193 xmlChar *buf = NULL;
4194 int len = 0;
4195 int size = XML_PARSER_BUFFER_SIZE;
4196 int cur, l;
4197 xmlChar stop;
4198 int state = ctxt->instate;
4199 int count = 0;
4200
4201 SHRINK;
4202 if (RAW == '"') {
4203 NEXT;
4204 stop = '"';
4205 } else if (RAW == '\'') {
4206 NEXT;
4207 stop = '\'';
4208 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004209 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004210 return(NULL);
4211 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004212
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004213 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004214 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004215 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004216 return(NULL);
4217 }
4218 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4219 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004220 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004221 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004222 xmlChar *tmp;
4223
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004224 if ((size > XML_MAX_NAME_LENGTH) &&
4225 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4226 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4227 xmlFree(buf);
4228 ctxt->instate = (xmlParserInputState) state;
4229 return(NULL);
4230 }
Owen Taylor3473f882001-02-23 17:55:21 +00004231 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004232 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4233 if (tmp == NULL) {
4234 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004235 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004236 ctxt->instate = (xmlParserInputState) state;
4237 return(NULL);
4238 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004239 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004240 }
4241 count++;
4242 if (count > 50) {
4243 GROW;
4244 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004245 if (ctxt->instate == XML_PARSER_EOF) {
4246 xmlFree(buf);
4247 return(NULL);
4248 }
Owen Taylor3473f882001-02-23 17:55:21 +00004249 }
4250 COPY_BUF(l,buf,len,cur);
4251 NEXTL(l);
4252 cur = CUR_CHAR(l);
4253 if (cur == 0) {
4254 GROW;
4255 SHRINK;
4256 cur = CUR_CHAR(l);
4257 }
4258 }
4259 buf[len] = 0;
4260 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004261 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004262 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004263 } else {
4264 NEXT;
4265 }
4266 return(buf);
4267}
4268
4269/**
4270 * xmlParsePubidLiteral:
4271 * @ctxt: an XML parser context
4272 *
4273 * parse an XML public literal
4274 *
4275 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4276 *
4277 * Returns the PubidLiteral parsed or NULL.
4278 */
4279
4280xmlChar *
4281xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4282 xmlChar *buf = NULL;
4283 int len = 0;
4284 int size = XML_PARSER_BUFFER_SIZE;
4285 xmlChar cur;
4286 xmlChar stop;
4287 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004288 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004289
4290 SHRINK;
4291 if (RAW == '"') {
4292 NEXT;
4293 stop = '"';
4294 } else if (RAW == '\'') {
4295 NEXT;
4296 stop = '\'';
4297 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004298 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004299 return(NULL);
4300 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004301 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004302 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004303 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004304 return(NULL);
4305 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004306 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004307 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004308 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004309 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004310 xmlChar *tmp;
4311
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004312 if ((size > XML_MAX_NAME_LENGTH) &&
4313 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4314 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4315 xmlFree(buf);
4316 return(NULL);
4317 }
Owen Taylor3473f882001-02-23 17:55:21 +00004318 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004319 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4320 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004321 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004322 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004323 return(NULL);
4324 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004325 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004326 }
4327 buf[len++] = cur;
4328 count++;
4329 if (count > 50) {
4330 GROW;
4331 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004332 if (ctxt->instate == XML_PARSER_EOF) {
4333 xmlFree(buf);
4334 return(NULL);
4335 }
Owen Taylor3473f882001-02-23 17:55:21 +00004336 }
4337 NEXT;
4338 cur = CUR;
4339 if (cur == 0) {
4340 GROW;
4341 SHRINK;
4342 cur = CUR;
4343 }
4344 }
4345 buf[len] = 0;
4346 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004347 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004348 } else {
4349 NEXT;
4350 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004351 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004352 return(buf);
4353}
4354
Daniel Veillard8ed10722009-08-20 19:17:36 +02004355static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004356
4357/*
4358 * used for the test in the inner loop of the char data testing
4359 */
4360static const unsigned char test_char_data[256] = {
4361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4366 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4367 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4368 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4369 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4370 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4371 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4372 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4373 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4374 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4375 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4376 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4377 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4378 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4379 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4393};
4394
Owen Taylor3473f882001-02-23 17:55:21 +00004395/**
4396 * xmlParseCharData:
4397 * @ctxt: an XML parser context
4398 * @cdata: int indicating whether we are within a CDATA section
4399 *
4400 * parse a CharData section.
4401 * if we are within a CDATA section ']]>' marks an end of section.
4402 *
4403 * The right angle bracket (>) may be represented using the string "&gt;",
4404 * and must, for compatibility, be escaped using "&gt;" or a character
4405 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004406 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004407 *
4408 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4409 */
4410
4411void
4412xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004413 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004414 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004415 int line = ctxt->input->line;
4416 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004417 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004418
4419 SHRINK;
4420 GROW;
4421 /*
4422 * Accelerated common case where input don't need to be
4423 * modified before passing it to the handler.
4424 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004425 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004426 in = ctxt->input->cur;
4427 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004428get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004429 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004430 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004431 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004432 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004433 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004434 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004435 goto get_more_space;
4436 }
4437 if (*in == '<') {
4438 nbchar = in - ctxt->input->cur;
4439 if (nbchar > 0) {
4440 const xmlChar *tmp = ctxt->input->cur;
4441 ctxt->input->cur = in;
4442
Daniel Veillard34099b42004-11-04 17:34:35 +00004443 if ((ctxt->sax != NULL) &&
4444 (ctxt->sax->ignorableWhitespace !=
4445 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004446 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004447 if (ctxt->sax->ignorableWhitespace != NULL)
4448 ctxt->sax->ignorableWhitespace(ctxt->userData,
4449 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004450 } else {
4451 if (ctxt->sax->characters != NULL)
4452 ctxt->sax->characters(ctxt->userData,
4453 tmp, nbchar);
4454 if (*ctxt->space == -1)
4455 *ctxt->space = -2;
4456 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004457 } else if ((ctxt->sax != NULL) &&
4458 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004459 ctxt->sax->characters(ctxt->userData,
4460 tmp, nbchar);
4461 }
4462 }
4463 return;
4464 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004465
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004466get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004467 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004468 while (test_char_data[*in]) {
4469 in++;
4470 ccol++;
4471 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004472 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004473 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004474 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004475 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004476 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004477 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004478 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004479 }
4480 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004481 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004482 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004483 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004484 return;
4485 }
4486 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004487 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004488 goto get_more;
4489 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004490 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004491 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004492 if ((ctxt->sax != NULL) &&
4493 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004494 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004495 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004496 const xmlChar *tmp = ctxt->input->cur;
4497 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004498
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004499 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004500 if (ctxt->sax->ignorableWhitespace != NULL)
4501 ctxt->sax->ignorableWhitespace(ctxt->userData,
4502 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004503 } else {
4504 if (ctxt->sax->characters != NULL)
4505 ctxt->sax->characters(ctxt->userData,
4506 tmp, nbchar);
4507 if (*ctxt->space == -1)
4508 *ctxt->space = -2;
4509 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004510 line = ctxt->input->line;
4511 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004512 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004513 if (ctxt->sax->characters != NULL)
4514 ctxt->sax->characters(ctxt->userData,
4515 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004516 line = ctxt->input->line;
4517 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004518 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004519 /* something really bad happened in the SAX callback */
4520 if (ctxt->instate != XML_PARSER_CONTENT)
4521 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004522 }
4523 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004524 if (*in == 0xD) {
4525 in++;
4526 if (*in == 0xA) {
4527 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004528 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004529 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004530 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004531 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004532 in--;
4533 }
4534 if (*in == '<') {
4535 return;
4536 }
4537 if (*in == '&') {
4538 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004539 }
4540 SHRINK;
4541 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004542 if (ctxt->instate == XML_PARSER_EOF)
4543 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004544 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004545 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004546 nbchar = 0;
4547 }
Daniel Veillard50582112001-03-26 22:52:16 +00004548 ctxt->input->line = line;
4549 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004550 xmlParseCharDataComplex(ctxt, cdata);
4551}
4552
Daniel Veillard01c13b52002-12-10 15:19:08 +00004553/**
4554 * xmlParseCharDataComplex:
4555 * @ctxt: an XML parser context
4556 * @cdata: int indicating whether we are within a CDATA section
4557 *
4558 * parse a CharData section.this is the fallback function
4559 * of xmlParseCharData() when the parsing requires handling
4560 * of non-ASCII characters.
4561 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004562static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004563xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004564 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4565 int nbchar = 0;
4566 int cur, l;
4567 int count = 0;
4568
4569 SHRINK;
4570 GROW;
4571 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004572 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004573 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004574 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004575 if ((cur == ']') && (NXT(1) == ']') &&
4576 (NXT(2) == '>')) {
4577 if (cdata) break;
4578 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004579 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004580 }
4581 }
4582 COPY_BUF(l,buf,nbchar,cur);
4583 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004584 buf[nbchar] = 0;
4585
Owen Taylor3473f882001-02-23 17:55:21 +00004586 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004587 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004588 */
4589 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004590 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004591 if (ctxt->sax->ignorableWhitespace != NULL)
4592 ctxt->sax->ignorableWhitespace(ctxt->userData,
4593 buf, nbchar);
4594 } else {
4595 if (ctxt->sax->characters != NULL)
4596 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004597 if ((ctxt->sax->characters !=
4598 ctxt->sax->ignorableWhitespace) &&
4599 (*ctxt->space == -1))
4600 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004601 }
4602 }
4603 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004604 /* something really bad happened in the SAX callback */
4605 if (ctxt->instate != XML_PARSER_CONTENT)
4606 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004607 }
4608 count++;
4609 if (count > 50) {
4610 GROW;
4611 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004612 if (ctxt->instate == XML_PARSER_EOF)
4613 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004614 }
4615 NEXTL(l);
4616 cur = CUR_CHAR(l);
4617 }
4618 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004619 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004620 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004621 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004622 */
4623 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004624 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004625 if (ctxt->sax->ignorableWhitespace != NULL)
4626 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4627 } else {
4628 if (ctxt->sax->characters != NULL)
4629 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004630 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4631 (*ctxt->space == -1))
4632 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004633 }
4634 }
4635 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004636 if ((cur != 0) && (!IS_CHAR(cur))) {
4637 /* Generate the error and skip the offending character */
4638 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4639 "PCDATA invalid Char value %d\n",
4640 cur);
4641 NEXTL(l);
4642 }
Owen Taylor3473f882001-02-23 17:55:21 +00004643}
4644
4645/**
4646 * xmlParseExternalID:
4647 * @ctxt: an XML parser context
4648 * @publicID: a xmlChar** receiving PubidLiteral
4649 * @strict: indicate whether we should restrict parsing to only
4650 * production [75], see NOTE below
4651 *
4652 * Parse an External ID or a Public ID
4653 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004654 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004655 * 'PUBLIC' S PubidLiteral S SystemLiteral
4656 *
4657 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4658 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4659 *
4660 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4661 *
4662 * Returns the function returns SystemLiteral and in the second
4663 * case publicID receives PubidLiteral, is strict is off
4664 * it is possible to return NULL and have publicID set.
4665 */
4666
4667xmlChar *
4668xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4669 xmlChar *URI = NULL;
4670
4671 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004672
4673 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004674 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004675 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004676 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004677 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4678 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004679 }
4680 SKIP_BLANKS;
4681 URI = xmlParseSystemLiteral(ctxt);
4682 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004683 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004684 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004685 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004686 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004687 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004688 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004689 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004690 }
4691 SKIP_BLANKS;
4692 *publicID = xmlParsePubidLiteral(ctxt);
4693 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004694 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004695 }
4696 if (strict) {
4697 /*
4698 * We don't handle [83] so "S SystemLiteral" is required.
4699 */
William M. Brack76e95df2003-10-18 16:20:14 +00004700 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004701 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004702 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004703 }
4704 } else {
4705 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004706 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004707 * "S SystemLiteral" is not detected. From a purely parsing
4708 * point of view that's a nice mess.
4709 */
4710 const xmlChar *ptr;
4711 GROW;
4712
4713 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004714 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004715
William M. Brack76e95df2003-10-18 16:20:14 +00004716 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004717 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4718 }
4719 SKIP_BLANKS;
4720 URI = xmlParseSystemLiteral(ctxt);
4721 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004722 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
4724 }
4725 return(URI);
4726}
4727
4728/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004729 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004730 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004731 * @buf: the already parsed part of the buffer
4732 * @len: number of bytes filles in the buffer
4733 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004734 *
4735 * Skip an XML (SGML) comment <!-- .... -->
4736 * The spec says that "For compatibility, the string "--" (double-hyphen)
4737 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004738 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004739 *
4740 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4741 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004742static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004743xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4744 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004745 int q, ql;
4746 int r, rl;
4747 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004748 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004749 int inputid;
4750
4751 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004752
Owen Taylor3473f882001-02-23 17:55:21 +00004753 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004754 len = 0;
4755 size = XML_PARSER_BUFFER_SIZE;
4756 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4757 if (buf == NULL) {
4758 xmlErrMemory(ctxt, NULL);
4759 return;
4760 }
Owen Taylor3473f882001-02-23 17:55:21 +00004761 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004762 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004763 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004764 if (q == 0)
4765 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004766 if (!IS_CHAR(q)) {
4767 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4768 "xmlParseComment: invalid xmlChar value %d\n",
4769 q);
4770 xmlFree (buf);
4771 return;
4772 }
Owen Taylor3473f882001-02-23 17:55:21 +00004773 NEXTL(ql);
4774 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004775 if (r == 0)
4776 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004777 if (!IS_CHAR(r)) {
4778 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4779 "xmlParseComment: invalid xmlChar value %d\n",
4780 q);
4781 xmlFree (buf);
4782 return;
4783 }
Owen Taylor3473f882001-02-23 17:55:21 +00004784 NEXTL(rl);
4785 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004786 if (cur == 0)
4787 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004788 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004789 ((cur != '>') ||
4790 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004791 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004792 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004793 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004794 if ((len > XML_MAX_TEXT_LENGTH) &&
4795 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4796 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4797 "Comment too big found", NULL);
4798 xmlFree (buf);
4799 return;
4800 }
Owen Taylor3473f882001-02-23 17:55:21 +00004801 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004802 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004803 size_t new_size;
4804
4805 new_size = size * 2;
4806 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004807 if (new_buf == NULL) {
4808 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004810 return;
4811 }
William M. Bracka3215c72004-07-31 16:24:01 +00004812 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004813 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004814 }
4815 COPY_BUF(ql,buf,len,q);
4816 q = r;
4817 ql = rl;
4818 r = cur;
4819 rl = l;
4820
4821 count++;
4822 if (count > 50) {
4823 GROW;
4824 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004825 if (ctxt->instate == XML_PARSER_EOF) {
4826 xmlFree(buf);
4827 return;
4828 }
Owen Taylor3473f882001-02-23 17:55:21 +00004829 }
4830 NEXTL(l);
4831 cur = CUR_CHAR(l);
4832 if (cur == 0) {
4833 SHRINK;
4834 GROW;
4835 cur = CUR_CHAR(l);
4836 }
4837 }
4838 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004839 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004840 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004841 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004842 } else if (!IS_CHAR(cur)) {
4843 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4844 "xmlParseComment: invalid xmlChar value %d\n",
4845 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004846 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004847 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004848 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4849 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004850 }
4851 NEXT;
4852 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4853 (!ctxt->disableSAX))
4854 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004855 }
Daniel Veillardda629342007-08-01 07:49:06 +00004856 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004857 return;
4858not_terminated:
4859 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4860 "Comment not terminated\n", NULL);
4861 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004862 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004863}
Daniel Veillardda629342007-08-01 07:49:06 +00004864
Daniel Veillard4c778d82005-01-23 17:37:44 +00004865/**
4866 * xmlParseComment:
4867 * @ctxt: an XML parser context
4868 *
4869 * Skip an XML (SGML) comment <!-- .... -->
4870 * The spec says that "For compatibility, the string "--" (double-hyphen)
4871 * must not occur within comments. "
4872 *
4873 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4874 */
4875void
4876xmlParseComment(xmlParserCtxtPtr ctxt) {
4877 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004878 size_t size = XML_PARSER_BUFFER_SIZE;
4879 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004880 xmlParserInputState state;
4881 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004882 size_t nbchar = 0;
4883 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004884 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004885
4886 /*
4887 * Check that there is a comment right here.
4888 */
4889 if ((RAW != '<') || (NXT(1) != '!') ||
4890 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004891 state = ctxt->instate;
4892 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004893 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004894 SKIP(4);
4895 SHRINK;
4896 GROW;
4897
4898 /*
4899 * Accelerated common case where input don't need to be
4900 * modified before passing it to the handler.
4901 */
4902 in = ctxt->input->cur;
4903 do {
4904 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004905 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004906 ctxt->input->line++; ctxt->input->col = 1;
4907 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004908 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004909 }
4910get_more:
4911 ccol = ctxt->input->col;
4912 while (((*in > '-') && (*in <= 0x7F)) ||
4913 ((*in >= 0x20) && (*in < '-')) ||
4914 (*in == 0x09)) {
4915 in++;
4916 ccol++;
4917 }
4918 ctxt->input->col = ccol;
4919 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004920 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004921 ctxt->input->line++; ctxt->input->col = 1;
4922 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004923 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004924 goto get_more;
4925 }
4926 nbchar = in - ctxt->input->cur;
4927 /*
4928 * save current set of data
4929 */
4930 if (nbchar > 0) {
4931 if ((ctxt->sax != NULL) &&
4932 (ctxt->sax->comment != NULL)) {
4933 if (buf == NULL) {
4934 if ((*in == '-') && (in[1] == '-'))
4935 size = nbchar + 1;
4936 else
4937 size = XML_PARSER_BUFFER_SIZE + nbchar;
4938 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4939 if (buf == NULL) {
4940 xmlErrMemory(ctxt, NULL);
4941 ctxt->instate = state;
4942 return;
4943 }
4944 len = 0;
4945 } else if (len + nbchar + 1 >= size) {
4946 xmlChar *new_buf;
4947 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4948 new_buf = (xmlChar *) xmlRealloc(buf,
4949 size * sizeof(xmlChar));
4950 if (new_buf == NULL) {
4951 xmlFree (buf);
4952 xmlErrMemory(ctxt, NULL);
4953 ctxt->instate = state;
4954 return;
4955 }
4956 buf = new_buf;
4957 }
4958 memcpy(&buf[len], ctxt->input->cur, nbchar);
4959 len += nbchar;
4960 buf[len] = 0;
4961 }
4962 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004963 if ((len > XML_MAX_TEXT_LENGTH) &&
4964 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4965 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4966 "Comment too big found", NULL);
4967 xmlFree (buf);
4968 return;
4969 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004970 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004971 if (*in == 0xA) {
4972 in++;
4973 ctxt->input->line++; ctxt->input->col = 1;
4974 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004975 if (*in == 0xD) {
4976 in++;
4977 if (*in == 0xA) {
4978 ctxt->input->cur = in;
4979 in++;
4980 ctxt->input->line++; ctxt->input->col = 1;
4981 continue; /* while */
4982 }
4983 in--;
4984 }
4985 SHRINK;
4986 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004987 if (ctxt->instate == XML_PARSER_EOF) {
4988 xmlFree(buf);
4989 return;
4990 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004991 in = ctxt->input->cur;
4992 if (*in == '-') {
4993 if (in[1] == '-') {
4994 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004995 if (ctxt->input->id != inputid) {
4996 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4997 "comment doesn't start and stop in the same entity\n");
4998 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004999 SKIP(3);
5000 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5001 (!ctxt->disableSAX)) {
5002 if (buf != NULL)
5003 ctxt->sax->comment(ctxt->userData, buf);
5004 else
5005 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5006 }
5007 if (buf != NULL)
5008 xmlFree(buf);
5009 ctxt->instate = state;
5010 return;
5011 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005012 if (buf != NULL) {
5013 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5014 "Double hyphen within comment: "
5015 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005016 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005017 } else
5018 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5019 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005020 in++;
5021 ctxt->input->col++;
5022 }
5023 in++;
5024 ctxt->input->col++;
5025 goto get_more;
5026 }
5027 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5028 xmlParseCommentComplex(ctxt, buf, len, size);
5029 ctxt->instate = state;
5030 return;
5031}
5032
Owen Taylor3473f882001-02-23 17:55:21 +00005033
5034/**
5035 * xmlParsePITarget:
5036 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005037 *
Owen Taylor3473f882001-02-23 17:55:21 +00005038 * parse the name of a PI
5039 *
5040 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5041 *
5042 * Returns the PITarget name or NULL
5043 */
5044
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005045const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005046xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005047 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005048
5049 name = xmlParseName(ctxt);
5050 if ((name != NULL) &&
5051 ((name[0] == 'x') || (name[0] == 'X')) &&
5052 ((name[1] == 'm') || (name[1] == 'M')) &&
5053 ((name[2] == 'l') || (name[2] == 'L'))) {
5054 int i;
5055 if ((name[0] == 'x') && (name[1] == 'm') &&
5056 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005057 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005058 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005059 return(name);
5060 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005061 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005062 return(name);
5063 }
5064 for (i = 0;;i++) {
5065 if (xmlW3CPIs[i] == NULL) break;
5066 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5067 return(name);
5068 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005069 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5070 "xmlParsePITarget: invalid name prefix 'xml'\n",
5071 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005072 }
Daniel Veillard37334572008-07-31 08:20:02 +00005073 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005074 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005075 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5076 }
Owen Taylor3473f882001-02-23 17:55:21 +00005077 return(name);
5078}
5079
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005080#ifdef LIBXML_CATALOG_ENABLED
5081/**
5082 * xmlParseCatalogPI:
5083 * @ctxt: an XML parser context
5084 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005085 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005086 * parse an XML Catalog Processing Instruction.
5087 *
5088 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5089 *
5090 * Occurs only if allowed by the user and if happening in the Misc
5091 * part of the document before any doctype informations
5092 * This will add the given catalog to the parsing context in order
5093 * to be used if there is a resolution need further down in the document
5094 */
5095
5096static void
5097xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5098 xmlChar *URL = NULL;
5099 const xmlChar *tmp, *base;
5100 xmlChar marker;
5101
5102 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005103 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005104 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5105 goto error;
5106 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005107 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005108 if (*tmp != '=') {
5109 return;
5110 }
5111 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005112 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005113 marker = *tmp;
5114 if ((marker != '\'') && (marker != '"'))
5115 goto error;
5116 tmp++;
5117 base = tmp;
5118 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5119 if (*tmp == 0)
5120 goto error;
5121 URL = xmlStrndup(base, tmp - base);
5122 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005123 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005124 if (*tmp != 0)
5125 goto error;
5126
5127 if (URL != NULL) {
5128 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5129 xmlFree(URL);
5130 }
5131 return;
5132
5133error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005134 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5135 "Catalog PI syntax error: %s\n",
5136 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005137 if (URL != NULL)
5138 xmlFree(URL);
5139}
5140#endif
5141
Owen Taylor3473f882001-02-23 17:55:21 +00005142/**
5143 * xmlParsePI:
5144 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005145 *
Owen Taylor3473f882001-02-23 17:55:21 +00005146 * parse an XML Processing Instruction.
5147 *
5148 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5149 *
5150 * The processing is transfered to SAX once parsed.
5151 */
5152
5153void
5154xmlParsePI(xmlParserCtxtPtr ctxt) {
5155 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005156 size_t len = 0;
5157 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005158 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005159 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005160 xmlParserInputState state;
5161 int count = 0;
5162
5163 if ((RAW == '<') && (NXT(1) == '?')) {
5164 xmlParserInputPtr input = ctxt->input;
5165 state = ctxt->instate;
5166 ctxt->instate = XML_PARSER_PI;
5167 /*
5168 * this is a Processing Instruction.
5169 */
5170 SKIP(2);
5171 SHRINK;
5172
5173 /*
5174 * Parse the target name and check for special support like
5175 * namespace.
5176 */
5177 target = xmlParsePITarget(ctxt);
5178 if (target != NULL) {
5179 if ((RAW == '?') && (NXT(1) == '>')) {
5180 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005181 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5182 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005183 }
5184 SKIP(2);
5185
5186 /*
5187 * SAX: PI detected.
5188 */
5189 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5190 (ctxt->sax->processingInstruction != NULL))
5191 ctxt->sax->processingInstruction(ctxt->userData,
5192 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005193 if (ctxt->instate != XML_PARSER_EOF)
5194 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005195 return;
5196 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005197 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005198 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005199 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005200 ctxt->instate = state;
5201 return;
5202 }
5203 cur = CUR;
5204 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005205 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5206 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005207 }
5208 SKIP_BLANKS;
5209 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005210 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005211 ((cur != '?') || (NXT(1) != '>'))) {
5212 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005213 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005214 size_t new_size = size * 2;
5215 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005216 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005217 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005218 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005219 ctxt->instate = state;
5220 return;
5221 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005222 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005223 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005224 }
5225 count++;
5226 if (count > 50) {
5227 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005228 if (ctxt->instate == XML_PARSER_EOF) {
5229 xmlFree(buf);
5230 return;
5231 }
Owen Taylor3473f882001-02-23 17:55:21 +00005232 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005233 if ((len > XML_MAX_TEXT_LENGTH) &&
5234 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5235 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5236 "PI %s too big found", target);
5237 xmlFree(buf);
5238 ctxt->instate = state;
5239 return;
5240 }
Owen Taylor3473f882001-02-23 17:55:21 +00005241 }
5242 COPY_BUF(l,buf,len,cur);
5243 NEXTL(l);
5244 cur = CUR_CHAR(l);
5245 if (cur == 0) {
5246 SHRINK;
5247 GROW;
5248 cur = CUR_CHAR(l);
5249 }
5250 }
Daniel Veillard51304812012-07-19 20:34:26 +08005251 if ((len > XML_MAX_TEXT_LENGTH) &&
5252 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5253 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5254 "PI %s too big found", target);
5255 xmlFree(buf);
5256 ctxt->instate = state;
5257 return;
5258 }
Owen Taylor3473f882001-02-23 17:55:21 +00005259 buf[len] = 0;
5260 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005261 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5262 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005263 } else {
5264 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005265 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5266 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005267 }
5268 SKIP(2);
5269
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005270#ifdef LIBXML_CATALOG_ENABLED
5271 if (((state == XML_PARSER_MISC) ||
5272 (state == XML_PARSER_START)) &&
5273 (xmlStrEqual(target, XML_CATALOG_PI))) {
5274 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5275 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5276 (allow == XML_CATA_ALLOW_ALL))
5277 xmlParseCatalogPI(ctxt, buf);
5278 }
5279#endif
5280
5281
Owen Taylor3473f882001-02-23 17:55:21 +00005282 /*
5283 * SAX: PI detected.
5284 */
5285 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5286 (ctxt->sax->processingInstruction != NULL))
5287 ctxt->sax->processingInstruction(ctxt->userData,
5288 target, buf);
5289 }
5290 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005292 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005293 }
Chris Evans77404b82011-12-14 16:18:25 +08005294 if (ctxt->instate != XML_PARSER_EOF)
5295 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005296 }
5297}
5298
5299/**
5300 * xmlParseNotationDecl:
5301 * @ctxt: an XML parser context
5302 *
5303 * parse a notation declaration
5304 *
5305 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5306 *
5307 * Hence there is actually 3 choices:
5308 * 'PUBLIC' S PubidLiteral
5309 * 'PUBLIC' S PubidLiteral S SystemLiteral
5310 * and 'SYSTEM' S SystemLiteral
5311 *
5312 * See the NOTE on xmlParseExternalID().
5313 */
5314
5315void
5316xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005317 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005318 xmlChar *Pubid;
5319 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005320
Daniel Veillarda07050d2003-10-19 14:46:32 +00005321 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005322 xmlParserInputPtr input = ctxt->input;
5323 SHRINK;
5324 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005325 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005326 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5327 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005328 return;
5329 }
5330 SKIP_BLANKS;
5331
Daniel Veillard76d66f42001-05-16 21:05:17 +00005332 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005333 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005334 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005335 return;
5336 }
William M. Brack76e95df2003-10-18 16:20:14 +00005337 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005338 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005339 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005340 return;
5341 }
Daniel Veillard37334572008-07-31 08:20:02 +00005342 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005343 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005344 "colon are forbidden from notation names '%s'\n",
5345 name, NULL, NULL);
5346 }
Owen Taylor3473f882001-02-23 17:55:21 +00005347 SKIP_BLANKS;
5348
5349 /*
5350 * Parse the IDs.
5351 */
5352 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5353 SKIP_BLANKS;
5354
5355 if (RAW == '>') {
5356 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005357 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5358 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005359 }
5360 NEXT;
5361 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5362 (ctxt->sax->notationDecl != NULL))
5363 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5364 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005365 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005366 }
Owen Taylor3473f882001-02-23 17:55:21 +00005367 if (Systemid != NULL) xmlFree(Systemid);
5368 if (Pubid != NULL) xmlFree(Pubid);
5369 }
5370}
5371
5372/**
5373 * xmlParseEntityDecl:
5374 * @ctxt: an XML parser context
5375 *
5376 * parse <!ENTITY declarations
5377 *
5378 * [70] EntityDecl ::= GEDecl | PEDecl
5379 *
5380 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5381 *
5382 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5383 *
5384 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5385 *
5386 * [74] PEDef ::= EntityValue | ExternalID
5387 *
5388 * [76] NDataDecl ::= S 'NDATA' S Name
5389 *
5390 * [ VC: Notation Declared ]
5391 * The Name must match the declared name of a notation.
5392 */
5393
5394void
5395xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005396 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005397 xmlChar *value = NULL;
5398 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005399 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 int isParameter = 0;
5401 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005402 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005403
Daniel Veillard4c778d82005-01-23 17:37:44 +00005404 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005405 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005406 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005407 SHRINK;
5408 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005409 skipped = SKIP_BLANKS;
5410 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005411 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5412 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005413 }
Owen Taylor3473f882001-02-23 17:55:21 +00005414
5415 if (RAW == '%') {
5416 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005417 skipped = SKIP_BLANKS;
5418 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005419 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5420 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005421 }
Owen Taylor3473f882001-02-23 17:55:21 +00005422 isParameter = 1;
5423 }
5424
Daniel Veillard76d66f42001-05-16 21:05:17 +00005425 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005426 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005427 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5428 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005429 return;
5430 }
Daniel Veillard37334572008-07-31 08:20:02 +00005431 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005432 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005433 "colon are forbidden from entities names '%s'\n",
5434 name, NULL, NULL);
5435 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005436 skipped = SKIP_BLANKS;
5437 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005438 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5439 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005440 }
Owen Taylor3473f882001-02-23 17:55:21 +00005441
Daniel Veillardf5582f12002-06-11 10:08:16 +00005442 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005443 /*
5444 * handle the various case of definitions...
5445 */
5446 if (isParameter) {
5447 if ((RAW == '"') || (RAW == '\'')) {
5448 value = xmlParseEntityValue(ctxt, &orig);
5449 if (value) {
5450 if ((ctxt->sax != NULL) &&
5451 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5452 ctxt->sax->entityDecl(ctxt->userData, name,
5453 XML_INTERNAL_PARAMETER_ENTITY,
5454 NULL, NULL, value);
5455 }
5456 } else {
5457 URI = xmlParseExternalID(ctxt, &literal, 1);
5458 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005459 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005460 }
5461 if (URI) {
5462 xmlURIPtr uri;
5463
5464 uri = xmlParseURI((const char *) URI);
5465 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005466 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5467 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005468 /*
5469 * This really ought to be a well formedness error
5470 * but the XML Core WG decided otherwise c.f. issue
5471 * E26 of the XML erratas.
5472 */
Owen Taylor3473f882001-02-23 17:55:21 +00005473 } else {
5474 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005475 /*
5476 * Okay this is foolish to block those but not
5477 * invalid URIs.
5478 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005479 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005480 } else {
5481 if ((ctxt->sax != NULL) &&
5482 (!ctxt->disableSAX) &&
5483 (ctxt->sax->entityDecl != NULL))
5484 ctxt->sax->entityDecl(ctxt->userData, name,
5485 XML_EXTERNAL_PARAMETER_ENTITY,
5486 literal, URI, NULL);
5487 }
5488 xmlFreeURI(uri);
5489 }
5490 }
5491 }
5492 } else {
5493 if ((RAW == '"') || (RAW == '\'')) {
5494 value = xmlParseEntityValue(ctxt, &orig);
5495 if ((ctxt->sax != NULL) &&
5496 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5497 ctxt->sax->entityDecl(ctxt->userData, name,
5498 XML_INTERNAL_GENERAL_ENTITY,
5499 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005500 /*
5501 * For expat compatibility in SAX mode.
5502 */
5503 if ((ctxt->myDoc == NULL) ||
5504 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5505 if (ctxt->myDoc == NULL) {
5506 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005507 if (ctxt->myDoc == NULL) {
5508 xmlErrMemory(ctxt, "New Doc failed");
5509 return;
5510 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005511 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005512 }
5513 if (ctxt->myDoc->intSubset == NULL)
5514 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5515 BAD_CAST "fake", NULL, NULL);
5516
Daniel Veillard1af9a412003-08-20 22:54:39 +00005517 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5518 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005519 }
Owen Taylor3473f882001-02-23 17:55:21 +00005520 } else {
5521 URI = xmlParseExternalID(ctxt, &literal, 1);
5522 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005523 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005524 }
5525 if (URI) {
5526 xmlURIPtr uri;
5527
5528 uri = xmlParseURI((const char *)URI);
5529 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005530 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5531 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005532 /*
5533 * This really ought to be a well formedness error
5534 * but the XML Core WG decided otherwise c.f. issue
5535 * E26 of the XML erratas.
5536 */
Owen Taylor3473f882001-02-23 17:55:21 +00005537 } else {
5538 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005539 /*
5540 * Okay this is foolish to block those but not
5541 * invalid URIs.
5542 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005543 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005544 }
5545 xmlFreeURI(uri);
5546 }
5547 }
William M. Brack76e95df2003-10-18 16:20:14 +00005548 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005549 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5550 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005551 }
5552 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005553 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005554 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005555 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005556 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5557 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005558 }
5559 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005560 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005561 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5562 (ctxt->sax->unparsedEntityDecl != NULL))
5563 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5564 literal, URI, ndata);
5565 } else {
5566 if ((ctxt->sax != NULL) &&
5567 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5568 ctxt->sax->entityDecl(ctxt->userData, name,
5569 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5570 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005571 /*
5572 * For expat compatibility in SAX mode.
5573 * assuming the entity repalcement was asked for
5574 */
5575 if ((ctxt->replaceEntities != 0) &&
5576 ((ctxt->myDoc == NULL) ||
5577 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5578 if (ctxt->myDoc == NULL) {
5579 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005580 if (ctxt->myDoc == NULL) {
5581 xmlErrMemory(ctxt, "New Doc failed");
5582 return;
5583 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005584 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005585 }
5586
5587 if (ctxt->myDoc->intSubset == NULL)
5588 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5589 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005590 xmlSAX2EntityDecl(ctxt, name,
5591 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5592 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005593 }
Owen Taylor3473f882001-02-23 17:55:21 +00005594 }
5595 }
5596 }
5597 SKIP_BLANKS;
5598 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005599 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005600 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005601 } else {
5602 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005603 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5604 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005605 }
5606 NEXT;
5607 }
5608 if (orig != NULL) {
5609 /*
5610 * Ugly mechanism to save the raw entity value.
5611 */
5612 xmlEntityPtr cur = NULL;
5613
5614 if (isParameter) {
5615 if ((ctxt->sax != NULL) &&
5616 (ctxt->sax->getParameterEntity != NULL))
5617 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5618 } else {
5619 if ((ctxt->sax != NULL) &&
5620 (ctxt->sax->getEntity != NULL))
5621 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005622 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005623 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005624 }
Owen Taylor3473f882001-02-23 17:55:21 +00005625 }
5626 if (cur != NULL) {
5627 if (cur->orig != NULL)
5628 xmlFree(orig);
5629 else
5630 cur->orig = orig;
5631 } else
5632 xmlFree(orig);
5633 }
Owen Taylor3473f882001-02-23 17:55:21 +00005634 if (value != NULL) xmlFree(value);
5635 if (URI != NULL) xmlFree(URI);
5636 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005637 }
5638}
5639
5640/**
5641 * xmlParseDefaultDecl:
5642 * @ctxt: an XML parser context
5643 * @value: Receive a possible fixed default value for the attribute
5644 *
5645 * Parse an attribute default declaration
5646 *
5647 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5648 *
5649 * [ VC: Required Attribute ]
5650 * if the default declaration is the keyword #REQUIRED, then the
5651 * attribute must be specified for all elements of the type in the
5652 * attribute-list declaration.
5653 *
5654 * [ VC: Attribute Default Legal ]
5655 * The declared default value must meet the lexical constraints of
5656 * the declared attribute type c.f. xmlValidateAttributeDecl()
5657 *
5658 * [ VC: Fixed Attribute Default ]
5659 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005660 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005661 *
5662 * [ WFC: No < in Attribute Values ]
5663 * handled in xmlParseAttValue()
5664 *
5665 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005666 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005667 */
5668
5669int
5670xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5671 int val;
5672 xmlChar *ret;
5673
5674 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005675 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005676 SKIP(9);
5677 return(XML_ATTRIBUTE_REQUIRED);
5678 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005679 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005680 SKIP(8);
5681 return(XML_ATTRIBUTE_IMPLIED);
5682 }
5683 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005684 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005685 SKIP(6);
5686 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005687 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005688 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5689 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005690 }
5691 SKIP_BLANKS;
5692 }
5693 ret = xmlParseAttValue(ctxt);
5694 ctxt->instate = XML_PARSER_DTD;
5695 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005696 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005697 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005698 } else
5699 *value = ret;
5700 return(val);
5701}
5702
5703/**
5704 * xmlParseNotationType:
5705 * @ctxt: an XML parser context
5706 *
5707 * parse an Notation attribute type.
5708 *
5709 * Note: the leading 'NOTATION' S part has already being parsed...
5710 *
5711 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5712 *
5713 * [ VC: Notation Attributes ]
5714 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005715 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005716 *
5717 * Returns: the notation attribute tree built while parsing
5718 */
5719
5720xmlEnumerationPtr
5721xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005722 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005723 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005724
5725 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005726 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005727 return(NULL);
5728 }
5729 SHRINK;
5730 do {
5731 NEXT;
5732 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005733 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005734 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005735 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5736 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005737 xmlFreeEnumeration(ret);
5738 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005739 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005740 tmp = ret;
5741 while (tmp != NULL) {
5742 if (xmlStrEqual(name, tmp->name)) {
5743 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5744 "standalone: attribute notation value token %s duplicated\n",
5745 name, NULL);
5746 if (!xmlDictOwns(ctxt->dict, name))
5747 xmlFree((xmlChar *) name);
5748 break;
5749 }
5750 tmp = tmp->next;
5751 }
5752 if (tmp == NULL) {
5753 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005754 if (cur == NULL) {
5755 xmlFreeEnumeration(ret);
5756 return(NULL);
5757 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005758 if (last == NULL) ret = last = cur;
5759 else {
5760 last->next = cur;
5761 last = cur;
5762 }
Owen Taylor3473f882001-02-23 17:55:21 +00005763 }
5764 SKIP_BLANKS;
5765 } while (RAW == '|');
5766 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005767 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005768 xmlFreeEnumeration(ret);
5769 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005770 }
5771 NEXT;
5772 return(ret);
5773}
5774
5775/**
5776 * xmlParseEnumerationType:
5777 * @ctxt: an XML parser context
5778 *
5779 * parse an Enumeration attribute type.
5780 *
5781 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5782 *
5783 * [ VC: Enumeration ]
5784 * Values of this type must match one of the Nmtoken tokens in
5785 * the declaration
5786 *
5787 * Returns: the enumeration attribute tree built while parsing
5788 */
5789
5790xmlEnumerationPtr
5791xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5792 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005793 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005794
5795 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005796 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005797 return(NULL);
5798 }
5799 SHRINK;
5800 do {
5801 NEXT;
5802 SKIP_BLANKS;
5803 name = xmlParseNmtoken(ctxt);
5804 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005805 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005806 return(ret);
5807 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005808 tmp = ret;
5809 while (tmp != NULL) {
5810 if (xmlStrEqual(name, tmp->name)) {
5811 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5812 "standalone: attribute enumeration value token %s duplicated\n",
5813 name, NULL);
5814 if (!xmlDictOwns(ctxt->dict, name))
5815 xmlFree(name);
5816 break;
5817 }
5818 tmp = tmp->next;
5819 }
5820 if (tmp == NULL) {
5821 cur = xmlCreateEnumeration(name);
5822 if (!xmlDictOwns(ctxt->dict, name))
5823 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005824 if (cur == NULL) {
5825 xmlFreeEnumeration(ret);
5826 return(NULL);
5827 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005828 if (last == NULL) ret = last = cur;
5829 else {
5830 last->next = cur;
5831 last = cur;
5832 }
Owen Taylor3473f882001-02-23 17:55:21 +00005833 }
5834 SKIP_BLANKS;
5835 } while (RAW == '|');
5836 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005837 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005838 return(ret);
5839 }
5840 NEXT;
5841 return(ret);
5842}
5843
5844/**
5845 * xmlParseEnumeratedType:
5846 * @ctxt: an XML parser context
5847 * @tree: the enumeration tree built while parsing
5848 *
5849 * parse an Enumerated attribute type.
5850 *
5851 * [57] EnumeratedType ::= NotationType | Enumeration
5852 *
5853 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5854 *
5855 *
5856 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5857 */
5858
5859int
5860xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005861 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005862 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005863 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005864 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5865 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005866 return(0);
5867 }
5868 SKIP_BLANKS;
5869 *tree = xmlParseNotationType(ctxt);
5870 if (*tree == NULL) return(0);
5871 return(XML_ATTRIBUTE_NOTATION);
5872 }
5873 *tree = xmlParseEnumerationType(ctxt);
5874 if (*tree == NULL) return(0);
5875 return(XML_ATTRIBUTE_ENUMERATION);
5876}
5877
5878/**
5879 * xmlParseAttributeType:
5880 * @ctxt: an XML parser context
5881 * @tree: the enumeration tree built while parsing
5882 *
5883 * parse the Attribute list def for an element
5884 *
5885 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5886 *
5887 * [55] StringType ::= 'CDATA'
5888 *
5889 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5890 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5891 *
5892 * Validity constraints for attribute values syntax are checked in
5893 * xmlValidateAttributeValue()
5894 *
5895 * [ VC: ID ]
5896 * Values of type ID must match the Name production. A name must not
5897 * appear more than once in an XML document as a value of this type;
5898 * i.e., ID values must uniquely identify the elements which bear them.
5899 *
5900 * [ VC: One ID per Element Type ]
5901 * No element type may have more than one ID attribute specified.
5902 *
5903 * [ VC: ID Attribute Default ]
5904 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5905 *
5906 * [ VC: IDREF ]
5907 * Values of type IDREF must match the Name production, and values
5908 * of type IDREFS must match Names; each IDREF Name must match the value
5909 * of an ID attribute on some element in the XML document; i.e. IDREF
5910 * values must match the value of some ID attribute.
5911 *
5912 * [ VC: Entity Name ]
5913 * Values of type ENTITY must match the Name production, values
5914 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005915 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005916 *
5917 * [ VC: Name Token ]
5918 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005919 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005920 *
5921 * Returns the attribute type
5922 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005923int
Owen Taylor3473f882001-02-23 17:55:21 +00005924xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5925 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005926 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005927 SKIP(5);
5928 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005929 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005930 SKIP(6);
5931 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005932 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005933 SKIP(5);
5934 return(XML_ATTRIBUTE_IDREF);
5935 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5936 SKIP(2);
5937 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005938 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005939 SKIP(6);
5940 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005941 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005942 SKIP(8);
5943 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005944 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005945 SKIP(8);
5946 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005947 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005948 SKIP(7);
5949 return(XML_ATTRIBUTE_NMTOKEN);
5950 }
5951 return(xmlParseEnumeratedType(ctxt, tree));
5952}
5953
5954/**
5955 * xmlParseAttributeListDecl:
5956 * @ctxt: an XML parser context
5957 *
5958 * : parse the Attribute list def for an element
5959 *
5960 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5961 *
5962 * [53] AttDef ::= S Name S AttType S DefaultDecl
5963 *
5964 */
5965void
5966xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005967 const xmlChar *elemName;
5968 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005969 xmlEnumerationPtr tree;
5970
Daniel Veillarda07050d2003-10-19 14:46:32 +00005971 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005972 xmlParserInputPtr input = ctxt->input;
5973
5974 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005975 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005976 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005977 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005978 }
5979 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005980 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005981 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005982 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5983 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005984 return;
5985 }
5986 SKIP_BLANKS;
5987 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005988 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005989 const xmlChar *check = CUR_PTR;
5990 int type;
5991 int def;
5992 xmlChar *defaultValue = NULL;
5993
5994 GROW;
5995 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005996 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005997 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005998 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5999 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006000 break;
6001 }
6002 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006003 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006004 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006005 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006006 break;
6007 }
6008 SKIP_BLANKS;
6009
6010 type = xmlParseAttributeType(ctxt, &tree);
6011 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006012 break;
6013 }
6014
6015 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006016 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006017 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6018 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006019 if (tree != NULL)
6020 xmlFreeEnumeration(tree);
6021 break;
6022 }
6023 SKIP_BLANKS;
6024
6025 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6026 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006027 if (defaultValue != NULL)
6028 xmlFree(defaultValue);
6029 if (tree != NULL)
6030 xmlFreeEnumeration(tree);
6031 break;
6032 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006033 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6034 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006035
6036 GROW;
6037 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006038 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006039 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006040 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006041 if (defaultValue != NULL)
6042 xmlFree(defaultValue);
6043 if (tree != NULL)
6044 xmlFreeEnumeration(tree);
6045 break;
6046 }
6047 SKIP_BLANKS;
6048 }
6049 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006050 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6051 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006052 if (defaultValue != NULL)
6053 xmlFree(defaultValue);
6054 if (tree != NULL)
6055 xmlFreeEnumeration(tree);
6056 break;
6057 }
6058 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6059 (ctxt->sax->attributeDecl != NULL))
6060 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6061 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006062 else if (tree != NULL)
6063 xmlFreeEnumeration(tree);
6064
6065 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006066 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006067 (def != XML_ATTRIBUTE_REQUIRED)) {
6068 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6069 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006070 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006071 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6072 }
Owen Taylor3473f882001-02-23 17:55:21 +00006073 if (defaultValue != NULL)
6074 xmlFree(defaultValue);
6075 GROW;
6076 }
6077 if (RAW == '>') {
6078 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006079 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6080 "Attribute list declaration doesn't start and stop in the same entity\n",
6081 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006082 }
6083 NEXT;
6084 }
Owen Taylor3473f882001-02-23 17:55:21 +00006085 }
6086}
6087
6088/**
6089 * xmlParseElementMixedContentDecl:
6090 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006091 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006092 *
6093 * parse the declaration for a Mixed Element content
6094 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006095 *
Owen Taylor3473f882001-02-23 17:55:21 +00006096 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6097 * '(' S? '#PCDATA' S? ')'
6098 *
6099 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6100 *
6101 * [ VC: No Duplicate Types ]
6102 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006103 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006104 *
6105 * returns: the list of the xmlElementContentPtr describing the element choices
6106 */
6107xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006108xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006109 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006110 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006111
6112 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006113 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006114 SKIP(7);
6115 SKIP_BLANKS;
6116 SHRINK;
6117 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006118 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006119 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6120"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006121 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006122 }
Owen Taylor3473f882001-02-23 17:55:21 +00006123 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006124 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006125 if (ret == NULL)
6126 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006127 if (RAW == '*') {
6128 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6129 NEXT;
6130 }
6131 return(ret);
6132 }
6133 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006134 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006135 if (ret == NULL) return(NULL);
6136 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006137 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006138 NEXT;
6139 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006140 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006141 if (ret == NULL) return(NULL);
6142 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006143 if (cur != NULL)
6144 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006145 cur = ret;
6146 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006147 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006148 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006149 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006150 if (n->c1 != NULL)
6151 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006152 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006153 if (n != NULL)
6154 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006155 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006156 }
6157 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006158 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006159 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006160 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006161 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006162 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006163 return(NULL);
6164 }
6165 SKIP_BLANKS;
6166 GROW;
6167 }
6168 if ((RAW == ')') && (NXT(1) == '*')) {
6169 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006170 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006171 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006172 if (cur->c2 != NULL)
6173 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006174 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006175 if (ret != NULL)
6176 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006177 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006178 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6179"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006180 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006181 }
Owen Taylor3473f882001-02-23 17:55:21 +00006182 SKIP(2);
6183 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006184 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006185 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006186 return(NULL);
6187 }
6188
6189 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006190 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006191 }
6192 return(ret);
6193}
6194
6195/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006196 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006197 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006198 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006199 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006200 *
6201 * parse the declaration for a Mixed Element content
6202 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006203 *
Owen Taylor3473f882001-02-23 17:55:21 +00006204 *
6205 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6206 *
6207 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6208 *
6209 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6210 *
6211 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6212 *
6213 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6214 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006215 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006216 * opening or closing parentheses in a choice, seq, or Mixed
6217 * construct is contained in the replacement text for a parameter
6218 * entity, both must be contained in the same replacement text. For
6219 * interoperability, if a parameter-entity reference appears in a
6220 * choice, seq, or Mixed construct, its replacement text should not
6221 * be empty, and neither the first nor last non-blank character of
6222 * the replacement text should be a connector (| or ,).
6223 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006224 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006225 * hierarchy.
6226 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006227static xmlElementContentPtr
6228xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6229 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006230 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006231 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006232 xmlChar type = 0;
6233
Daniel Veillard489f9672009-08-10 16:49:30 +02006234 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6235 (depth > 2048)) {
6236 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6237"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6238 depth);
6239 return(NULL);
6240 }
Owen Taylor3473f882001-02-23 17:55:21 +00006241 SKIP_BLANKS;
6242 GROW;
6243 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006244 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006245
Owen Taylor3473f882001-02-23 17:55:21 +00006246 /* Recurse on first child */
6247 NEXT;
6248 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006249 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6250 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006251 SKIP_BLANKS;
6252 GROW;
6253 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006254 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006255 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006256 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006257 return(NULL);
6258 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006259 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006260 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006261 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006262 return(NULL);
6263 }
Owen Taylor3473f882001-02-23 17:55:21 +00006264 GROW;
6265 if (RAW == '?') {
6266 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6267 NEXT;
6268 } else if (RAW == '*') {
6269 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6270 NEXT;
6271 } else if (RAW == '+') {
6272 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6273 NEXT;
6274 } else {
6275 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6276 }
Owen Taylor3473f882001-02-23 17:55:21 +00006277 GROW;
6278 }
6279 SKIP_BLANKS;
6280 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006281 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006282 /*
6283 * Each loop we parse one separator and one element.
6284 */
6285 if (RAW == ',') {
6286 if (type == 0) type = CUR;
6287
6288 /*
6289 * Detect "Name | Name , Name" error
6290 */
6291 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006292 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006293 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006294 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006295 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006296 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006297 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006298 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006299 return(NULL);
6300 }
6301 NEXT;
6302
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006303 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006304 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006305 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006306 xmlFreeDocElementContent(ctxt->myDoc, last);
6307 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006308 return(NULL);
6309 }
6310 if (last == NULL) {
6311 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006312 if (ret != NULL)
6313 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006314 ret = cur = op;
6315 } else {
6316 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006317 if (op != NULL)
6318 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006319 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006320 if (last != NULL)
6321 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006322 cur =op;
6323 last = NULL;
6324 }
6325 } else if (RAW == '|') {
6326 if (type == 0) type = CUR;
6327
6328 /*
6329 * Detect "Name , Name | Name" error
6330 */
6331 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006332 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006333 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006334 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006335 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006336 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006338 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006339 return(NULL);
6340 }
6341 NEXT;
6342
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006343 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006344 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006345 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006346 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006347 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006348 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006349 return(NULL);
6350 }
6351 if (last == NULL) {
6352 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006353 if (ret != NULL)
6354 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006355 ret = cur = op;
6356 } else {
6357 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006358 if (op != NULL)
6359 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006360 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006361 if (last != NULL)
6362 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006363 cur =op;
6364 last = NULL;
6365 }
6366 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006367 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006368 if ((last != NULL) && (last != ret))
6369 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006370 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006371 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006372 return(NULL);
6373 }
6374 GROW;
6375 SKIP_BLANKS;
6376 GROW;
6377 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006378 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006379 /* Recurse on second child */
6380 NEXT;
6381 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006382 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6383 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006384 SKIP_BLANKS;
6385 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006386 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006387 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006388 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006389 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006390 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006391 return(NULL);
6392 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006393 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006394 if (last == NULL) {
6395 if (ret != NULL)
6396 xmlFreeDocElementContent(ctxt->myDoc, ret);
6397 return(NULL);
6398 }
Owen Taylor3473f882001-02-23 17:55:21 +00006399 if (RAW == '?') {
6400 last->ocur = XML_ELEMENT_CONTENT_OPT;
6401 NEXT;
6402 } else if (RAW == '*') {
6403 last->ocur = XML_ELEMENT_CONTENT_MULT;
6404 NEXT;
6405 } else if (RAW == '+') {
6406 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6407 NEXT;
6408 } else {
6409 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6410 }
6411 }
6412 SKIP_BLANKS;
6413 GROW;
6414 }
6415 if ((cur != NULL) && (last != NULL)) {
6416 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006417 if (last != NULL)
6418 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006419 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006420 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006421 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6422"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006423 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006424 }
Owen Taylor3473f882001-02-23 17:55:21 +00006425 NEXT;
6426 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006427 if (ret != NULL) {
6428 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6429 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6430 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6431 else
6432 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6433 }
Owen Taylor3473f882001-02-23 17:55:21 +00006434 NEXT;
6435 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006436 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006437 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006438 cur = ret;
6439 /*
6440 * Some normalization:
6441 * (a | b* | c?)* == (a | b | c)*
6442 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006443 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006444 if ((cur->c1 != NULL) &&
6445 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6446 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6447 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6448 if ((cur->c2 != NULL) &&
6449 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6450 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6451 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6452 cur = cur->c2;
6453 }
6454 }
Owen Taylor3473f882001-02-23 17:55:21 +00006455 NEXT;
6456 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006457 if (ret != NULL) {
6458 int found = 0;
6459
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006460 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6461 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6462 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006463 else
6464 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006465 /*
6466 * Some normalization:
6467 * (a | b*)+ == (a | b)*
6468 * (a | b?)+ == (a | b)*
6469 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006470 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006471 if ((cur->c1 != NULL) &&
6472 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6473 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6474 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6475 found = 1;
6476 }
6477 if ((cur->c2 != NULL) &&
6478 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6479 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6480 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6481 found = 1;
6482 }
6483 cur = cur->c2;
6484 }
6485 if (found)
6486 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6487 }
Owen Taylor3473f882001-02-23 17:55:21 +00006488 NEXT;
6489 }
6490 return(ret);
6491}
6492
6493/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006494 * xmlParseElementChildrenContentDecl:
6495 * @ctxt: an XML parser context
6496 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006497 *
6498 * parse the declaration for a Mixed Element content
6499 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6500 *
6501 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6502 *
6503 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6504 *
6505 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6506 *
6507 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6508 *
6509 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6510 * TODO Parameter-entity replacement text must be properly nested
6511 * with parenthesized groups. That is to say, if either of the
6512 * opening or closing parentheses in a choice, seq, or Mixed
6513 * construct is contained in the replacement text for a parameter
6514 * entity, both must be contained in the same replacement text. For
6515 * interoperability, if a parameter-entity reference appears in a
6516 * choice, seq, or Mixed construct, its replacement text should not
6517 * be empty, and neither the first nor last non-blank character of
6518 * the replacement text should be a connector (| or ,).
6519 *
6520 * Returns the tree of xmlElementContentPtr describing the element
6521 * hierarchy.
6522 */
6523xmlElementContentPtr
6524xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6525 /* stub left for API/ABI compat */
6526 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6527}
6528
6529/**
Owen Taylor3473f882001-02-23 17:55:21 +00006530 * xmlParseElementContentDecl:
6531 * @ctxt: an XML parser context
6532 * @name: the name of the element being defined.
6533 * @result: the Element Content pointer will be stored here if any
6534 *
6535 * parse the declaration for an Element content either Mixed or Children,
6536 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006537 *
Owen Taylor3473f882001-02-23 17:55:21 +00006538 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6539 *
6540 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6541 */
6542
6543int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006544xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006545 xmlElementContentPtr *result) {
6546
6547 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006548 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006549 int res;
6550
6551 *result = NULL;
6552
6553 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006554 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006555 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006556 return(-1);
6557 }
6558 NEXT;
6559 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006560 if (ctxt->instate == XML_PARSER_EOF)
6561 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006562 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006563 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006564 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 res = XML_ELEMENT_TYPE_MIXED;
6566 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006567 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006568 res = XML_ELEMENT_TYPE_ELEMENT;
6569 }
Owen Taylor3473f882001-02-23 17:55:21 +00006570 SKIP_BLANKS;
6571 *result = tree;
6572 return(res);
6573}
6574
6575/**
6576 * xmlParseElementDecl:
6577 * @ctxt: an XML parser context
6578 *
6579 * parse an Element declaration.
6580 *
6581 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6582 *
6583 * [ VC: Unique Element Type Declaration ]
6584 * No element type may be declared more than once
6585 *
6586 * Returns the type of the element, or -1 in case of error
6587 */
6588int
6589xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006590 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006591 int ret = -1;
6592 xmlElementContentPtr content = NULL;
6593
Daniel Veillard4c778d82005-01-23 17:37:44 +00006594 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006595 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006596 xmlParserInputPtr input = ctxt->input;
6597
6598 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006599 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006600 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6601 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006602 }
6603 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006604 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006605 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006606 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6607 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006608 return(-1);
6609 }
6610 while ((RAW == 0) && (ctxt->inputNr > 1))
6611 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006612 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006613 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6614 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006615 }
6616 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006617 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006618 SKIP(5);
6619 /*
6620 * Element must always be empty.
6621 */
6622 ret = XML_ELEMENT_TYPE_EMPTY;
6623 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6624 (NXT(2) == 'Y')) {
6625 SKIP(3);
6626 /*
6627 * Element is a generic container.
6628 */
6629 ret = XML_ELEMENT_TYPE_ANY;
6630 } else if (RAW == '(') {
6631 ret = xmlParseElementContentDecl(ctxt, name, &content);
6632 } else {
6633 /*
6634 * [ WFC: PEs in Internal Subset ] error handling.
6635 */
6636 if ((RAW == '%') && (ctxt->external == 0) &&
6637 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006638 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006639 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006640 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006641 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006642 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6643 }
Owen Taylor3473f882001-02-23 17:55:21 +00006644 return(-1);
6645 }
6646
6647 SKIP_BLANKS;
6648 /*
6649 * Pop-up of finished entities.
6650 */
6651 while ((RAW == 0) && (ctxt->inputNr > 1))
6652 xmlPopInput(ctxt);
6653 SKIP_BLANKS;
6654
6655 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006656 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006657 if (content != NULL) {
6658 xmlFreeDocElementContent(ctxt->myDoc, content);
6659 }
Owen Taylor3473f882001-02-23 17:55:21 +00006660 } else {
6661 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006662 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6663 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006664 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006665
Owen Taylor3473f882001-02-23 17:55:21 +00006666 NEXT;
6667 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006668 (ctxt->sax->elementDecl != NULL)) {
6669 if (content != NULL)
6670 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006671 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6672 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006673 if ((content != NULL) && (content->parent == NULL)) {
6674 /*
6675 * this is a trick: if xmlAddElementDecl is called,
6676 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006677 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006678 * interfaces or change the API/ABI
6679 */
6680 xmlFreeDocElementContent(ctxt->myDoc, content);
6681 }
6682 } else if (content != NULL) {
6683 xmlFreeDocElementContent(ctxt->myDoc, content);
6684 }
Owen Taylor3473f882001-02-23 17:55:21 +00006685 }
Owen Taylor3473f882001-02-23 17:55:21 +00006686 }
6687 return(ret);
6688}
6689
6690/**
Owen Taylor3473f882001-02-23 17:55:21 +00006691 * xmlParseConditionalSections
6692 * @ctxt: an XML parser context
6693 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006694 * [61] conditionalSect ::= includeSect | ignoreSect
6695 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006696 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6697 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6698 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6699 */
6700
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006701static void
Owen Taylor3473f882001-02-23 17:55:21 +00006702xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006703 int id = ctxt->input->id;
6704
Owen Taylor3473f882001-02-23 17:55:21 +00006705 SKIP(3);
6706 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006707 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006708 SKIP(7);
6709 SKIP_BLANKS;
6710 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006711 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006712 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006713 if (ctxt->input->id != id) {
6714 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6715 "All markup of the conditional section is not in the same entity\n",
6716 NULL, NULL);
6717 }
Owen Taylor3473f882001-02-23 17:55:21 +00006718 NEXT;
6719 }
6720 if (xmlParserDebugEntities) {
6721 if ((ctxt->input != NULL) && (ctxt->input->filename))
6722 xmlGenericError(xmlGenericErrorContext,
6723 "%s(%d): ", ctxt->input->filename,
6724 ctxt->input->line);
6725 xmlGenericError(xmlGenericErrorContext,
6726 "Entering INCLUDE Conditional Section\n");
6727 }
6728
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006729 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6730 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006731 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006732 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006733
6734 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6735 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006736 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006737 NEXT;
6738 } else if (RAW == '%') {
6739 xmlParsePEReference(ctxt);
6740 } else
6741 xmlParseMarkupDecl(ctxt);
6742
6743 /*
6744 * Pop-up of finished entities.
6745 */
6746 while ((RAW == 0) && (ctxt->inputNr > 1))
6747 xmlPopInput(ctxt);
6748
Daniel Veillardfdc91562002-07-01 21:52:03 +00006749 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006750 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006751 break;
6752 }
6753 }
6754 if (xmlParserDebugEntities) {
6755 if ((ctxt->input != NULL) && (ctxt->input->filename))
6756 xmlGenericError(xmlGenericErrorContext,
6757 "%s(%d): ", ctxt->input->filename,
6758 ctxt->input->line);
6759 xmlGenericError(xmlGenericErrorContext,
6760 "Leaving INCLUDE Conditional Section\n");
6761 }
6762
Daniel Veillarda07050d2003-10-19 14:46:32 +00006763 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006764 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006765 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006766 int depth = 0;
6767
6768 SKIP(6);
6769 SKIP_BLANKS;
6770 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006771 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006772 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006773 if (ctxt->input->id != id) {
6774 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6775 "All markup of the conditional section is not in the same entity\n",
6776 NULL, NULL);
6777 }
Owen Taylor3473f882001-02-23 17:55:21 +00006778 NEXT;
6779 }
6780 if (xmlParserDebugEntities) {
6781 if ((ctxt->input != NULL) && (ctxt->input->filename))
6782 xmlGenericError(xmlGenericErrorContext,
6783 "%s(%d): ", ctxt->input->filename,
6784 ctxt->input->line);
6785 xmlGenericError(xmlGenericErrorContext,
6786 "Entering IGNORE Conditional Section\n");
6787 }
6788
6789 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006790 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006791 * But disable SAX event generating DTD building in the meantime
6792 */
6793 state = ctxt->disableSAX;
6794 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006795 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006796 ctxt->instate = XML_PARSER_IGNORE;
6797
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006798 while (((depth >= 0) && (RAW != 0)) &&
6799 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006800 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6801 depth++;
6802 SKIP(3);
6803 continue;
6804 }
6805 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6806 if (--depth >= 0) SKIP(3);
6807 continue;
6808 }
6809 NEXT;
6810 continue;
6811 }
6812
6813 ctxt->disableSAX = state;
6814 ctxt->instate = instate;
6815
6816 if (xmlParserDebugEntities) {
6817 if ((ctxt->input != NULL) && (ctxt->input->filename))
6818 xmlGenericError(xmlGenericErrorContext,
6819 "%s(%d): ", ctxt->input->filename,
6820 ctxt->input->line);
6821 xmlGenericError(xmlGenericErrorContext,
6822 "Leaving IGNORE Conditional Section\n");
6823 }
6824
6825 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006826 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006827 }
6828
6829 if (RAW == 0)
6830 SHRINK;
6831
6832 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006833 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006834 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006835 if (ctxt->input->id != id) {
6836 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6837 "All markup of the conditional section is not in the same entity\n",
6838 NULL, NULL);
6839 }
Owen Taylor3473f882001-02-23 17:55:21 +00006840 SKIP(3);
6841 }
6842}
6843
6844/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006845 * xmlParseMarkupDecl:
6846 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006847 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006848 * parse Markup declarations
6849 *
6850 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6851 * NotationDecl | PI | Comment
6852 *
6853 * [ VC: Proper Declaration/PE Nesting ]
6854 * Parameter-entity replacement text must be properly nested with
6855 * markup declarations. That is to say, if either the first character
6856 * or the last character of a markup declaration (markupdecl above) is
6857 * contained in the replacement text for a parameter-entity reference,
6858 * both must be contained in the same replacement text.
6859 *
6860 * [ WFC: PEs in Internal Subset ]
6861 * In the internal DTD subset, parameter-entity references can occur
6862 * only where markup declarations can occur, not within markup declarations.
6863 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006864 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006865 */
6866void
6867xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6868 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006869 if (CUR == '<') {
6870 if (NXT(1) == '!') {
6871 switch (NXT(2)) {
6872 case 'E':
6873 if (NXT(3) == 'L')
6874 xmlParseElementDecl(ctxt);
6875 else if (NXT(3) == 'N')
6876 xmlParseEntityDecl(ctxt);
6877 break;
6878 case 'A':
6879 xmlParseAttributeListDecl(ctxt);
6880 break;
6881 case 'N':
6882 xmlParseNotationDecl(ctxt);
6883 break;
6884 case '-':
6885 xmlParseComment(ctxt);
6886 break;
6887 default:
6888 /* there is an error but it will be detected later */
6889 break;
6890 }
6891 } else if (NXT(1) == '?') {
6892 xmlParsePI(ctxt);
6893 }
6894 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006895 /*
6896 * This is only for internal subset. On external entities,
6897 * the replacement is done before parsing stage
6898 */
6899 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6900 xmlParsePEReference(ctxt);
6901
6902 /*
6903 * Conditional sections are allowed from entities included
6904 * by PE References in the internal subset.
6905 */
6906 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6907 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6908 xmlParseConditionalSections(ctxt);
6909 }
6910 }
6911
6912 ctxt->instate = XML_PARSER_DTD;
6913}
6914
6915/**
6916 * xmlParseTextDecl:
6917 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006918 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006919 * parse an XML declaration header for external entities
6920 *
6921 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006922 */
6923
6924void
6925xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6926 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006927 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006928
6929 /*
6930 * We know that '<?xml' is here.
6931 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006932 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006933 SKIP(5);
6934 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006935 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006936 return;
6937 }
6938
William M. Brack76e95df2003-10-18 16:20:14 +00006939 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006940 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6941 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006942 }
6943 SKIP_BLANKS;
6944
6945 /*
6946 * We may have the VersionInfo here.
6947 */
6948 version = xmlParseVersionInfo(ctxt);
6949 if (version == NULL)
6950 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006951 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006952 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6954 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006955 }
6956 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006957 ctxt->input->version = version;
6958
6959 /*
6960 * We must have the encoding declaration
6961 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006962 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006963 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6964 /*
6965 * The XML REC instructs us to stop parsing right here
6966 */
6967 return;
6968 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006969 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6970 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6971 "Missing encoding in text declaration\n");
6972 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006973
6974 SKIP_BLANKS;
6975 if ((RAW == '?') && (NXT(1) == '>')) {
6976 SKIP(2);
6977 } else if (RAW == '>') {
6978 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006979 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006980 NEXT;
6981 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006982 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006983 MOVETO_ENDTAG(CUR_PTR);
6984 NEXT;
6985 }
6986}
6987
6988/**
Owen Taylor3473f882001-02-23 17:55:21 +00006989 * xmlParseExternalSubset:
6990 * @ctxt: an XML parser context
6991 * @ExternalID: the external identifier
6992 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006993 *
Owen Taylor3473f882001-02-23 17:55:21 +00006994 * parse Markup declarations from an external subset
6995 *
6996 * [30] extSubset ::= textDecl? extSubsetDecl
6997 *
6998 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6999 */
7000void
7001xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7002 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007003 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007004 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007005
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007006 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007007 (ctxt->input->end - ctxt->input->cur >= 4)) {
7008 xmlChar start[4];
7009 xmlCharEncoding enc;
7010
7011 start[0] = RAW;
7012 start[1] = NXT(1);
7013 start[2] = NXT(2);
7014 start[3] = NXT(3);
7015 enc = xmlDetectCharEncoding(start, 4);
7016 if (enc != XML_CHAR_ENCODING_NONE)
7017 xmlSwitchEncoding(ctxt, enc);
7018 }
7019
Daniel Veillarda07050d2003-10-19 14:46:32 +00007020 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007021 xmlParseTextDecl(ctxt);
7022 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7023 /*
7024 * The XML REC instructs us to stop parsing right here
7025 */
7026 ctxt->instate = XML_PARSER_EOF;
7027 return;
7028 }
7029 }
7030 if (ctxt->myDoc == NULL) {
7031 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007032 if (ctxt->myDoc == NULL) {
7033 xmlErrMemory(ctxt, "New Doc failed");
7034 return;
7035 }
7036 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007037 }
7038 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7039 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7040
7041 ctxt->instate = XML_PARSER_DTD;
7042 ctxt->external = 1;
7043 while (((RAW == '<') && (NXT(1) == '?')) ||
7044 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007045 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007046 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007047 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007048
7049 GROW;
7050 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7051 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007052 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007053 NEXT;
7054 } else if (RAW == '%') {
7055 xmlParsePEReference(ctxt);
7056 } else
7057 xmlParseMarkupDecl(ctxt);
7058
7059 /*
7060 * Pop-up of finished entities.
7061 */
7062 while ((RAW == 0) && (ctxt->inputNr > 1))
7063 xmlPopInput(ctxt);
7064
Daniel Veillardfdc91562002-07-01 21:52:03 +00007065 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007066 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007067 break;
7068 }
7069 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007070
Owen Taylor3473f882001-02-23 17:55:21 +00007071 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007072 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007073 }
7074
7075}
7076
7077/**
7078 * xmlParseReference:
7079 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007080 *
Owen Taylor3473f882001-02-23 17:55:21 +00007081 * parse and handle entity references in content, depending on the SAX
7082 * interface, this may end-up in a call to character() if this is a
7083 * CharRef, a predefined entity, if there is no reference() callback.
7084 * or if the parser was asked to switch to that mode.
7085 *
7086 * [67] Reference ::= EntityRef | CharRef
7087 */
7088void
7089xmlParseReference(xmlParserCtxtPtr ctxt) {
7090 xmlEntityPtr ent;
7091 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007092 int was_checked;
7093 xmlNodePtr list = NULL;
7094 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007095
Daniel Veillard0161e632008-08-28 15:36:32 +00007096
7097 if (RAW != '&')
7098 return;
7099
7100 /*
7101 * Simple case of a CharRef
7102 */
Owen Taylor3473f882001-02-23 17:55:21 +00007103 if (NXT(1) == '#') {
7104 int i = 0;
7105 xmlChar out[10];
7106 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007107 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007108
Daniel Veillarddc171602008-03-26 17:41:38 +00007109 if (value == 0)
7110 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007111 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7112 /*
7113 * So we are using non-UTF-8 buffers
7114 * Check that the char fit on 8bits, if not
7115 * generate a CharRef.
7116 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007117 if (value <= 0xFF) {
7118 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007119 out[1] = 0;
7120 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7121 (!ctxt->disableSAX))
7122 ctxt->sax->characters(ctxt->userData, out, 1);
7123 } else {
7124 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007125 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007126 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007127 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007128 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7129 (!ctxt->disableSAX))
7130 ctxt->sax->reference(ctxt->userData, out);
7131 }
7132 } else {
7133 /*
7134 * Just encode the value in UTF-8
7135 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007136 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007137 out[i] = 0;
7138 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7139 (!ctxt->disableSAX))
7140 ctxt->sax->characters(ctxt->userData, out, i);
7141 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007142 return;
7143 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007144
Daniel Veillard0161e632008-08-28 15:36:32 +00007145 /*
7146 * We are seeing an entity reference
7147 */
7148 ent = xmlParseEntityRef(ctxt);
7149 if (ent == NULL) return;
7150 if (!ctxt->wellFormed)
7151 return;
7152 was_checked = ent->checked;
7153
7154 /* special case of predefined entities */
7155 if ((ent->name == NULL) ||
7156 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7157 val = ent->content;
7158 if (val == NULL) return;
7159 /*
7160 * inline the entity.
7161 */
7162 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7163 (!ctxt->disableSAX))
7164 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7165 return;
7166 }
7167
7168 /*
7169 * The first reference to the entity trigger a parsing phase
7170 * where the ent->children is filled with the result from
7171 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007172 * Note: external parsed entities will not be loaded, it is not
7173 * required for a non-validating parser, unless the parsing option
7174 * of validating, or substituting entities were given. Doing so is
7175 * far more secure as the parser will only process data coming from
7176 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007177 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007178 if ((ent->checked == 0) &&
7179 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7180 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007181 unsigned long oldnbent = ctxt->nbentities;
7182
7183 /*
7184 * This is a bit hackish but this seems the best
7185 * way to make sure both SAX and DOM entity support
7186 * behaves okay.
7187 */
7188 void *user_data;
7189 if (ctxt->userData == ctxt)
7190 user_data = NULL;
7191 else
7192 user_data = ctxt->userData;
7193
7194 /*
7195 * Check that this entity is well formed
7196 * 4.3.2: An internal general parsed entity is well-formed
7197 * if its replacement text matches the production labeled
7198 * content.
7199 */
7200 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7201 ctxt->depth++;
7202 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7203 user_data, &list);
7204 ctxt->depth--;
7205
7206 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7207 ctxt->depth++;
7208 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7209 user_data, ctxt->depth, ent->URI,
7210 ent->ExternalID, &list);
7211 ctxt->depth--;
7212 } else {
7213 ret = XML_ERR_ENTITY_PE_INTERNAL;
7214 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7215 "invalid entity type found\n", NULL);
7216 }
7217
7218 /*
7219 * Store the number of entities needing parsing for this entity
7220 * content and do checkings
7221 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007222 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7223 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7224 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007225 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007226 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007227 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007228 return;
7229 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007230 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007231 xmlFreeNodeList(list);
7232 return;
7233 }
Owen Taylor3473f882001-02-23 17:55:21 +00007234
Daniel Veillard0161e632008-08-28 15:36:32 +00007235 if ((ret == XML_ERR_OK) && (list != NULL)) {
7236 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7237 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7238 (ent->children == NULL)) {
7239 ent->children = list;
7240 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007241 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007242 * Prune it directly in the generated document
7243 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007244 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007245 if (((list->type == XML_TEXT_NODE) &&
7246 (list->next == NULL)) ||
7247 (ctxt->parseMode == XML_PARSE_READER)) {
7248 list->parent = (xmlNodePtr) ent;
7249 list = NULL;
7250 ent->owner = 1;
7251 } else {
7252 ent->owner = 0;
7253 while (list != NULL) {
7254 list->parent = (xmlNodePtr) ctxt->node;
7255 list->doc = ctxt->myDoc;
7256 if (list->next == NULL)
7257 ent->last = list;
7258 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007259 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007260 list = ent->children;
7261#ifdef LIBXML_LEGACY_ENABLED
7262 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7263 xmlAddEntityReference(ent, list, NULL);
7264#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007265 }
7266 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007267 ent->owner = 1;
7268 while (list != NULL) {
7269 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007270 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007271 if (list->next == NULL)
7272 ent->last = list;
7273 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007274 }
7275 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007276 } else {
7277 xmlFreeNodeList(list);
7278 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007279 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007280 } else if ((ret != XML_ERR_OK) &&
7281 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7282 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7283 "Entity '%s' failed to parse\n", ent->name);
7284 } else if (list != NULL) {
7285 xmlFreeNodeList(list);
7286 list = NULL;
7287 }
7288 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007289 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007290 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007291 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007292 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007293
Daniel Veillard0161e632008-08-28 15:36:32 +00007294 /*
7295 * Now that the entity content has been gathered
7296 * provide it to the application, this can take different forms based
7297 * on the parsing modes.
7298 */
7299 if (ent->children == NULL) {
7300 /*
7301 * Probably running in SAX mode and the callbacks don't
7302 * build the entity content. So unless we already went
7303 * though parsing for first checking go though the entity
7304 * content to generate callbacks associated to the entity
7305 */
7306 if (was_checked != 0) {
7307 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007308 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007309 * This is a bit hackish but this seems the best
7310 * way to make sure both SAX and DOM entity support
7311 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007312 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007313 if (ctxt->userData == ctxt)
7314 user_data = NULL;
7315 else
7316 user_data = ctxt->userData;
7317
7318 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7319 ctxt->depth++;
7320 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7321 ent->content, user_data, NULL);
7322 ctxt->depth--;
7323 } else if (ent->etype ==
7324 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7325 ctxt->depth++;
7326 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7327 ctxt->sax, user_data, ctxt->depth,
7328 ent->URI, ent->ExternalID, NULL);
7329 ctxt->depth--;
7330 } else {
7331 ret = XML_ERR_ENTITY_PE_INTERNAL;
7332 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7333 "invalid entity type found\n", NULL);
7334 }
7335 if (ret == XML_ERR_ENTITY_LOOP) {
7336 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7337 return;
7338 }
7339 }
7340 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7341 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7342 /*
7343 * Entity reference callback comes second, it's somewhat
7344 * superfluous but a compatibility to historical behaviour
7345 */
7346 ctxt->sax->reference(ctxt->userData, ent->name);
7347 }
7348 return;
7349 }
7350
7351 /*
7352 * If we didn't get any children for the entity being built
7353 */
7354 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7355 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7356 /*
7357 * Create a node.
7358 */
7359 ctxt->sax->reference(ctxt->userData, ent->name);
7360 return;
7361 }
7362
7363 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7364 /*
7365 * There is a problem on the handling of _private for entities
7366 * (bug 155816): Should we copy the content of the field from
7367 * the entity (possibly overwriting some value set by the user
7368 * when a copy is created), should we leave it alone, or should
7369 * we try to take care of different situations? The problem
7370 * is exacerbated by the usage of this field by the xmlReader.
7371 * To fix this bug, we look at _private on the created node
7372 * and, if it's NULL, we copy in whatever was in the entity.
7373 * If it's not NULL we leave it alone. This is somewhat of a
7374 * hack - maybe we should have further tests to determine
7375 * what to do.
7376 */
7377 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7378 /*
7379 * Seems we are generating the DOM content, do
7380 * a simple tree copy for all references except the first
7381 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007382 */
7383 if (((list == NULL) && (ent->owner == 0)) ||
7384 (ctxt->parseMode == XML_PARSE_READER)) {
7385 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7386
7387 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007388 * We are copying here, make sure there is no abuse
7389 */
7390 ctxt->sizeentcopy += ent->length;
7391 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7392 return;
7393
7394 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007395 * when operating on a reader, the entities definitions
7396 * are always owning the entities subtree.
7397 if (ctxt->parseMode == XML_PARSE_READER)
7398 ent->owner = 1;
7399 */
7400
7401 cur = ent->children;
7402 while (cur != NULL) {
7403 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7404 if (nw != NULL) {
7405 if (nw->_private == NULL)
7406 nw->_private = cur->_private;
7407 if (firstChild == NULL){
7408 firstChild = nw;
7409 }
7410 nw = xmlAddChild(ctxt->node, nw);
7411 }
7412 if (cur == ent->last) {
7413 /*
7414 * needed to detect some strange empty
7415 * node cases in the reader tests
7416 */
7417 if ((ctxt->parseMode == XML_PARSE_READER) &&
7418 (nw != NULL) &&
7419 (nw->type == XML_ELEMENT_NODE) &&
7420 (nw->children == NULL))
7421 nw->extra = 1;
7422
7423 break;
7424 }
7425 cur = cur->next;
7426 }
7427#ifdef LIBXML_LEGACY_ENABLED
7428 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7429 xmlAddEntityReference(ent, firstChild, nw);
7430#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007431 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007432 xmlNodePtr nw = NULL, cur, next, last,
7433 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007434
7435 /*
7436 * We are copying here, make sure there is no abuse
7437 */
7438 ctxt->sizeentcopy += ent->length;
7439 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7440 return;
7441
Daniel Veillard0161e632008-08-28 15:36:32 +00007442 /*
7443 * Copy the entity child list and make it the new
7444 * entity child list. The goal is to make sure any
7445 * ID or REF referenced will be the one from the
7446 * document content and not the entity copy.
7447 */
7448 cur = ent->children;
7449 ent->children = NULL;
7450 last = ent->last;
7451 ent->last = NULL;
7452 while (cur != NULL) {
7453 next = cur->next;
7454 cur->next = NULL;
7455 cur->parent = NULL;
7456 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7457 if (nw != NULL) {
7458 if (nw->_private == NULL)
7459 nw->_private = cur->_private;
7460 if (firstChild == NULL){
7461 firstChild = cur;
7462 }
7463 xmlAddChild((xmlNodePtr) ent, nw);
7464 xmlAddChild(ctxt->node, cur);
7465 }
7466 if (cur == last)
7467 break;
7468 cur = next;
7469 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007470 if (ent->owner == 0)
7471 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007472#ifdef LIBXML_LEGACY_ENABLED
7473 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7474 xmlAddEntityReference(ent, firstChild, nw);
7475#endif /* LIBXML_LEGACY_ENABLED */
7476 } else {
7477 const xmlChar *nbktext;
7478
7479 /*
7480 * the name change is to avoid coalescing of the
7481 * node with a possible previous text one which
7482 * would make ent->children a dangling pointer
7483 */
7484 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7485 -1);
7486 if (ent->children->type == XML_TEXT_NODE)
7487 ent->children->name = nbktext;
7488 if ((ent->last != ent->children) &&
7489 (ent->last->type == XML_TEXT_NODE))
7490 ent->last->name = nbktext;
7491 xmlAddChildList(ctxt->node, ent->children);
7492 }
7493
7494 /*
7495 * This is to avoid a nasty side effect, see
7496 * characters() in SAX.c
7497 */
7498 ctxt->nodemem = 0;
7499 ctxt->nodelen = 0;
7500 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007501 }
7502 }
7503}
7504
7505/**
7506 * xmlParseEntityRef:
7507 * @ctxt: an XML parser context
7508 *
7509 * parse ENTITY references declarations
7510 *
7511 * [68] EntityRef ::= '&' Name ';'
7512 *
7513 * [ WFC: Entity Declared ]
7514 * In a document without any DTD, a document with only an internal DTD
7515 * subset which contains no parameter entity references, or a document
7516 * with "standalone='yes'", the Name given in the entity reference
7517 * must match that in an entity declaration, except that well-formed
7518 * documents need not declare any of the following entities: amp, lt,
7519 * gt, apos, quot. The declaration of a parameter entity must precede
7520 * any reference to it. Similarly, the declaration of a general entity
7521 * must precede any reference to it which appears in a default value in an
7522 * attribute-list declaration. Note that if entities are declared in the
7523 * external subset or in external parameter entities, a non-validating
7524 * processor is not obligated to read and process their declarations;
7525 * for such documents, the rule that an entity must be declared is a
7526 * well-formedness constraint only if standalone='yes'.
7527 *
7528 * [ WFC: Parsed Entity ]
7529 * An entity reference must not contain the name of an unparsed entity
7530 *
7531 * Returns the xmlEntityPtr if found, or NULL otherwise.
7532 */
7533xmlEntityPtr
7534xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007535 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007536 xmlEntityPtr ent = NULL;
7537
7538 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007539 if (ctxt->instate == XML_PARSER_EOF)
7540 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007541
Daniel Veillard0161e632008-08-28 15:36:32 +00007542 if (RAW != '&')
7543 return(NULL);
7544 NEXT;
7545 name = xmlParseName(ctxt);
7546 if (name == NULL) {
7547 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7548 "xmlParseEntityRef: no name\n");
7549 return(NULL);
7550 }
7551 if (RAW != ';') {
7552 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7553 return(NULL);
7554 }
7555 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007556
Daniel Veillard0161e632008-08-28 15:36:32 +00007557 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007558 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007559 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007560 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7561 ent = xmlGetPredefinedEntity(name);
7562 if (ent != NULL)
7563 return(ent);
7564 }
Owen Taylor3473f882001-02-23 17:55:21 +00007565
Daniel Veillard0161e632008-08-28 15:36:32 +00007566 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007567 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007568 */
7569 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007570
Daniel Veillard0161e632008-08-28 15:36:32 +00007571 /*
7572 * Ask first SAX for entity resolution, otherwise try the
7573 * entities which may have stored in the parser context.
7574 */
7575 if (ctxt->sax != NULL) {
7576 if (ctxt->sax->getEntity != NULL)
7577 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007578 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007579 (ctxt->options & XML_PARSE_OLDSAX))
7580 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007581 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7582 (ctxt->userData==ctxt)) {
7583 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007584 }
7585 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007586 /*
7587 * [ WFC: Entity Declared ]
7588 * In a document without any DTD, a document with only an
7589 * internal DTD subset which contains no parameter entity
7590 * references, or a document with "standalone='yes'", the
7591 * Name given in the entity reference must match that in an
7592 * entity declaration, except that well-formed documents
7593 * need not declare any of the following entities: amp, lt,
7594 * gt, apos, quot.
7595 * The declaration of a parameter entity must precede any
7596 * reference to it.
7597 * Similarly, the declaration of a general entity must
7598 * precede any reference to it which appears in a default
7599 * value in an attribute-list declaration. Note that if
7600 * entities are declared in the external subset or in
7601 * external parameter entities, a non-validating processor
7602 * is not obligated to read and process their declarations;
7603 * for such documents, the rule that an entity must be
7604 * declared is a well-formedness constraint only if
7605 * standalone='yes'.
7606 */
7607 if (ent == NULL) {
7608 if ((ctxt->standalone == 1) ||
7609 ((ctxt->hasExternalSubset == 0) &&
7610 (ctxt->hasPErefs == 0))) {
7611 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7612 "Entity '%s' not defined\n", name);
7613 } else {
7614 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7615 "Entity '%s' not defined\n", name);
7616 if ((ctxt->inSubset == 0) &&
7617 (ctxt->sax != NULL) &&
7618 (ctxt->sax->reference != NULL)) {
7619 ctxt->sax->reference(ctxt->userData, name);
7620 }
7621 }
7622 ctxt->valid = 0;
7623 }
7624
7625 /*
7626 * [ WFC: Parsed Entity ]
7627 * An entity reference must not contain the name of an
7628 * unparsed entity
7629 */
7630 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7631 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7632 "Entity reference to unparsed entity %s\n", name);
7633 }
7634
7635 /*
7636 * [ WFC: No External Entity References ]
7637 * Attribute values cannot contain direct or indirect
7638 * entity references to external entities.
7639 */
7640 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7641 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7642 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7643 "Attribute references external entity '%s'\n", name);
7644 }
7645 /*
7646 * [ WFC: No < in Attribute Values ]
7647 * The replacement text of any entity referred to directly or
7648 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007649 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007650 */
7651 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007652 (ent != NULL) &&
7653 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7654 if ((ent->checked & 1) || ((ent->checked == 0) &&
7655 (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7656 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7657 "'<' in entity '%s' is not allowed in attributes values\n", name);
7658 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007659 }
7660
7661 /*
7662 * Internal check, no parameter entities here ...
7663 */
7664 else {
7665 switch (ent->etype) {
7666 case XML_INTERNAL_PARAMETER_ENTITY:
7667 case XML_EXTERNAL_PARAMETER_ENTITY:
7668 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7669 "Attempt to reference the parameter entity '%s'\n",
7670 name);
7671 break;
7672 default:
7673 break;
7674 }
7675 }
7676
7677 /*
7678 * [ WFC: No Recursion ]
7679 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007680 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007681 * Done somewhere else
7682 */
Owen Taylor3473f882001-02-23 17:55:21 +00007683 return(ent);
7684}
7685
7686/**
7687 * xmlParseStringEntityRef:
7688 * @ctxt: an XML parser context
7689 * @str: a pointer to an index in the string
7690 *
7691 * parse ENTITY references declarations, but this version parses it from
7692 * a string value.
7693 *
7694 * [68] EntityRef ::= '&' Name ';'
7695 *
7696 * [ WFC: Entity Declared ]
7697 * In a document without any DTD, a document with only an internal DTD
7698 * subset which contains no parameter entity references, or a document
7699 * with "standalone='yes'", the Name given in the entity reference
7700 * must match that in an entity declaration, except that well-formed
7701 * documents need not declare any of the following entities: amp, lt,
7702 * gt, apos, quot. The declaration of a parameter entity must precede
7703 * any reference to it. Similarly, the declaration of a general entity
7704 * must precede any reference to it which appears in a default value in an
7705 * attribute-list declaration. Note that if entities are declared in the
7706 * external subset or in external parameter entities, a non-validating
7707 * processor is not obligated to read and process their declarations;
7708 * for such documents, the rule that an entity must be declared is a
7709 * well-formedness constraint only if standalone='yes'.
7710 *
7711 * [ WFC: Parsed Entity ]
7712 * An entity reference must not contain the name of an unparsed entity
7713 *
7714 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7715 * is updated to the current location in the string.
7716 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007717static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007718xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7719 xmlChar *name;
7720 const xmlChar *ptr;
7721 xmlChar cur;
7722 xmlEntityPtr ent = NULL;
7723
7724 if ((str == NULL) || (*str == NULL))
7725 return(NULL);
7726 ptr = *str;
7727 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007728 if (cur != '&')
7729 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007730
Daniel Veillard0161e632008-08-28 15:36:32 +00007731 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007732 name = xmlParseStringName(ctxt, &ptr);
7733 if (name == NULL) {
7734 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7735 "xmlParseStringEntityRef: no name\n");
7736 *str = ptr;
7737 return(NULL);
7738 }
7739 if (*ptr != ';') {
7740 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007741 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007742 *str = ptr;
7743 return(NULL);
7744 }
7745 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007746
Owen Taylor3473f882001-02-23 17:55:21 +00007747
Daniel Veillard0161e632008-08-28 15:36:32 +00007748 /*
7749 * Predefined entites override any extra definition
7750 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007751 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7752 ent = xmlGetPredefinedEntity(name);
7753 if (ent != NULL) {
7754 xmlFree(name);
7755 *str = ptr;
7756 return(ent);
7757 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007758 }
Owen Taylor3473f882001-02-23 17:55:21 +00007759
Daniel Veillard0161e632008-08-28 15:36:32 +00007760 /*
7761 * Increate the number of entity references parsed
7762 */
7763 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007764
Daniel Veillard0161e632008-08-28 15:36:32 +00007765 /*
7766 * Ask first SAX for entity resolution, otherwise try the
7767 * entities which may have stored in the parser context.
7768 */
7769 if (ctxt->sax != NULL) {
7770 if (ctxt->sax->getEntity != NULL)
7771 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007772 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7773 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007774 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7775 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007776 }
7777 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007778
7779 /*
7780 * [ WFC: Entity Declared ]
7781 * In a document without any DTD, a document with only an
7782 * internal DTD subset which contains no parameter entity
7783 * references, or a document with "standalone='yes'", the
7784 * Name given in the entity reference must match that in an
7785 * entity declaration, except that well-formed documents
7786 * need not declare any of the following entities: amp, lt,
7787 * gt, apos, quot.
7788 * The declaration of a parameter entity must precede any
7789 * reference to it.
7790 * Similarly, the declaration of a general entity must
7791 * precede any reference to it which appears in a default
7792 * value in an attribute-list declaration. Note that if
7793 * entities are declared in the external subset or in
7794 * external parameter entities, a non-validating processor
7795 * is not obligated to read and process their declarations;
7796 * for such documents, the rule that an entity must be
7797 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007798 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007799 */
7800 if (ent == NULL) {
7801 if ((ctxt->standalone == 1) ||
7802 ((ctxt->hasExternalSubset == 0) &&
7803 (ctxt->hasPErefs == 0))) {
7804 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7805 "Entity '%s' not defined\n", name);
7806 } else {
7807 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7808 "Entity '%s' not defined\n",
7809 name);
7810 }
7811 /* TODO ? check regressions ctxt->valid = 0; */
7812 }
7813
7814 /*
7815 * [ WFC: Parsed Entity ]
7816 * An entity reference must not contain the name of an
7817 * unparsed entity
7818 */
7819 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7820 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7821 "Entity reference to unparsed entity %s\n", name);
7822 }
7823
7824 /*
7825 * [ WFC: No External Entity References ]
7826 * Attribute values cannot contain direct or indirect
7827 * entity references to external entities.
7828 */
7829 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7830 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7831 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7832 "Attribute references external entity '%s'\n", name);
7833 }
7834 /*
7835 * [ WFC: No < in Attribute Values ]
7836 * The replacement text of any entity referred to directly or
7837 * indirectly in an attribute value (other than "&lt;") must
7838 * not contain a <.
7839 */
7840 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7841 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007842 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007843 (xmlStrchr(ent->content, '<'))) {
7844 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7845 "'<' in entity '%s' is not allowed in attributes values\n",
7846 name);
7847 }
7848
7849 /*
7850 * Internal check, no parameter entities here ...
7851 */
7852 else {
7853 switch (ent->etype) {
7854 case XML_INTERNAL_PARAMETER_ENTITY:
7855 case XML_EXTERNAL_PARAMETER_ENTITY:
7856 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7857 "Attempt to reference the parameter entity '%s'\n",
7858 name);
7859 break;
7860 default:
7861 break;
7862 }
7863 }
7864
7865 /*
7866 * [ WFC: No Recursion ]
7867 * A parsed entity must not contain a recursive reference
7868 * to itself, either directly or indirectly.
7869 * Done somewhere else
7870 */
7871
7872 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007873 *str = ptr;
7874 return(ent);
7875}
7876
7877/**
7878 * xmlParsePEReference:
7879 * @ctxt: an XML parser context
7880 *
7881 * parse PEReference declarations
7882 * The entity content is handled directly by pushing it's content as
7883 * a new input stream.
7884 *
7885 * [69] PEReference ::= '%' Name ';'
7886 *
7887 * [ WFC: No Recursion ]
7888 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007889 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007890 *
7891 * [ WFC: Entity Declared ]
7892 * In a document without any DTD, a document with only an internal DTD
7893 * subset which contains no parameter entity references, or a document
7894 * with "standalone='yes'", ... ... The declaration of a parameter
7895 * entity must precede any reference to it...
7896 *
7897 * [ VC: Entity Declared ]
7898 * In a document with an external subset or external parameter entities
7899 * with "standalone='no'", ... ... The declaration of a parameter entity
7900 * must precede any reference to it...
7901 *
7902 * [ WFC: In DTD ]
7903 * Parameter-entity references may only appear in the DTD.
7904 * NOTE: misleading but this is handled.
7905 */
7906void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007907xmlParsePEReference(xmlParserCtxtPtr ctxt)
7908{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007909 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007910 xmlEntityPtr entity = NULL;
7911 xmlParserInputPtr input;
7912
Daniel Veillard0161e632008-08-28 15:36:32 +00007913 if (RAW != '%')
7914 return;
7915 NEXT;
7916 name = xmlParseName(ctxt);
7917 if (name == NULL) {
7918 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7919 "xmlParsePEReference: no name\n");
7920 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007921 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007922 if (RAW != ';') {
7923 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7924 return;
7925 }
7926
7927 NEXT;
7928
7929 /*
7930 * Increate the number of entity references parsed
7931 */
7932 ctxt->nbentities++;
7933
7934 /*
7935 * Request the entity from SAX
7936 */
7937 if ((ctxt->sax != NULL) &&
7938 (ctxt->sax->getParameterEntity != NULL))
7939 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7940 name);
7941 if (entity == NULL) {
7942 /*
7943 * [ WFC: Entity Declared ]
7944 * In a document without any DTD, a document with only an
7945 * internal DTD subset which contains no parameter entity
7946 * references, or a document with "standalone='yes'", ...
7947 * ... The declaration of a parameter entity must precede
7948 * any reference to it...
7949 */
7950 if ((ctxt->standalone == 1) ||
7951 ((ctxt->hasExternalSubset == 0) &&
7952 (ctxt->hasPErefs == 0))) {
7953 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7954 "PEReference: %%%s; not found\n",
7955 name);
7956 } else {
7957 /*
7958 * [ VC: Entity Declared ]
7959 * In a document with an external subset or external
7960 * parameter entities with "standalone='no'", ...
7961 * ... The declaration of a parameter entity must
7962 * precede any reference to it...
7963 */
7964 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7965 "PEReference: %%%s; not found\n",
7966 name, NULL);
7967 ctxt->valid = 0;
7968 }
7969 } else {
7970 /*
7971 * Internal checking in case the entity quest barfed
7972 */
7973 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7974 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7975 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7976 "Internal: %%%s; is not a parameter entity\n",
7977 name, NULL);
7978 } else if (ctxt->input->free != deallocblankswrapper) {
7979 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7980 if (xmlPushInput(ctxt, input) < 0)
7981 return;
7982 } else {
7983 /*
7984 * TODO !!!
7985 * handle the extra spaces added before and after
7986 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7987 */
7988 input = xmlNewEntityInputStream(ctxt, entity);
7989 if (xmlPushInput(ctxt, input) < 0)
7990 return;
7991 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7992 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7993 (IS_BLANK_CH(NXT(5)))) {
7994 xmlParseTextDecl(ctxt);
7995 if (ctxt->errNo ==
7996 XML_ERR_UNSUPPORTED_ENCODING) {
7997 /*
7998 * The XML REC instructs us to stop parsing
7999 * right here
8000 */
8001 ctxt->instate = XML_PARSER_EOF;
8002 return;
8003 }
8004 }
8005 }
8006 }
8007 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008008}
8009
8010/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008011 * xmlLoadEntityContent:
8012 * @ctxt: an XML parser context
8013 * @entity: an unloaded system entity
8014 *
8015 * Load the original content of the given system entity from the
8016 * ExternalID/SystemID given. This is to be used for Included in Literal
8017 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8018 *
8019 * Returns 0 in case of success and -1 in case of failure
8020 */
8021static int
8022xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8023 xmlParserInputPtr input;
8024 xmlBufferPtr buf;
8025 int l, c;
8026 int count = 0;
8027
8028 if ((ctxt == NULL) || (entity == NULL) ||
8029 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8030 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8031 (entity->content != NULL)) {
8032 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8033 "xmlLoadEntityContent parameter error");
8034 return(-1);
8035 }
8036
8037 if (xmlParserDebugEntities)
8038 xmlGenericError(xmlGenericErrorContext,
8039 "Reading %s entity content input\n", entity->name);
8040
8041 buf = xmlBufferCreate();
8042 if (buf == NULL) {
8043 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8044 "xmlLoadEntityContent parameter error");
8045 return(-1);
8046 }
8047
8048 input = xmlNewEntityInputStream(ctxt, entity);
8049 if (input == NULL) {
8050 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8051 "xmlLoadEntityContent input error");
8052 xmlBufferFree(buf);
8053 return(-1);
8054 }
8055
8056 /*
8057 * Push the entity as the current input, read char by char
8058 * saving to the buffer until the end of the entity or an error
8059 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008060 if (xmlPushInput(ctxt, input) < 0) {
8061 xmlBufferFree(buf);
8062 return(-1);
8063 }
8064
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008065 GROW;
8066 c = CUR_CHAR(l);
8067 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8068 (IS_CHAR(c))) {
8069 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008070 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008071 count = 0;
8072 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008073 if (ctxt->instate == XML_PARSER_EOF) {
8074 xmlBufferFree(buf);
8075 return(-1);
8076 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008077 }
8078 NEXTL(l);
8079 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008080 if (c == 0) {
8081 count = 0;
8082 GROW;
8083 if (ctxt->instate == XML_PARSER_EOF) {
8084 xmlBufferFree(buf);
8085 return(-1);
8086 }
8087 c = CUR_CHAR(l);
8088 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008089 }
8090
8091 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8092 xmlPopInput(ctxt);
8093 } else if (!IS_CHAR(c)) {
8094 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8095 "xmlLoadEntityContent: invalid char value %d\n",
8096 c);
8097 xmlBufferFree(buf);
8098 return(-1);
8099 }
8100 entity->content = buf->content;
8101 buf->content = NULL;
8102 xmlBufferFree(buf);
8103
8104 return(0);
8105}
8106
8107/**
Owen Taylor3473f882001-02-23 17:55:21 +00008108 * xmlParseStringPEReference:
8109 * @ctxt: an XML parser context
8110 * @str: a pointer to an index in the string
8111 *
8112 * parse PEReference declarations
8113 *
8114 * [69] PEReference ::= '%' Name ';'
8115 *
8116 * [ WFC: No Recursion ]
8117 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008118 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008119 *
8120 * [ WFC: Entity Declared ]
8121 * In a document without any DTD, a document with only an internal DTD
8122 * subset which contains no parameter entity references, or a document
8123 * with "standalone='yes'", ... ... The declaration of a parameter
8124 * entity must precede any reference to it...
8125 *
8126 * [ VC: Entity Declared ]
8127 * In a document with an external subset or external parameter entities
8128 * with "standalone='no'", ... ... The declaration of a parameter entity
8129 * must precede any reference to it...
8130 *
8131 * [ WFC: In DTD ]
8132 * Parameter-entity references may only appear in the DTD.
8133 * NOTE: misleading but this is handled.
8134 *
8135 * Returns the string of the entity content.
8136 * str is updated to the current value of the index
8137 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008138static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008139xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8140 const xmlChar *ptr;
8141 xmlChar cur;
8142 xmlChar *name;
8143 xmlEntityPtr entity = NULL;
8144
8145 if ((str == NULL) || (*str == NULL)) return(NULL);
8146 ptr = *str;
8147 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008148 if (cur != '%')
8149 return(NULL);
8150 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008151 name = xmlParseStringName(ctxt, &ptr);
8152 if (name == NULL) {
8153 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8154 "xmlParseStringPEReference: no name\n");
8155 *str = ptr;
8156 return(NULL);
8157 }
8158 cur = *ptr;
8159 if (cur != ';') {
8160 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8161 xmlFree(name);
8162 *str = ptr;
8163 return(NULL);
8164 }
8165 ptr++;
8166
8167 /*
8168 * Increate the number of entity references parsed
8169 */
8170 ctxt->nbentities++;
8171
8172 /*
8173 * Request the entity from SAX
8174 */
8175 if ((ctxt->sax != NULL) &&
8176 (ctxt->sax->getParameterEntity != NULL))
8177 entity = ctxt->sax->getParameterEntity(ctxt->userData,
8178 name);
8179 if (entity == NULL) {
8180 /*
8181 * [ WFC: Entity Declared ]
8182 * In a document without any DTD, a document with only an
8183 * internal DTD subset which contains no parameter entity
8184 * references, or a document with "standalone='yes'", ...
8185 * ... The declaration of a parameter entity must precede
8186 * any reference to it...
8187 */
8188 if ((ctxt->standalone == 1) ||
8189 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8190 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8191 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008192 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008193 /*
8194 * [ VC: Entity Declared ]
8195 * In a document with an external subset or external
8196 * parameter entities with "standalone='no'", ...
8197 * ... The declaration of a parameter entity must
8198 * precede any reference to it...
8199 */
8200 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8201 "PEReference: %%%s; not found\n",
8202 name, NULL);
8203 ctxt->valid = 0;
8204 }
8205 } else {
8206 /*
8207 * Internal checking in case the entity quest barfed
8208 */
8209 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8210 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8211 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8212 "%%%s; is not a parameter entity\n",
8213 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008214 }
8215 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008216 ctxt->hasPErefs = 1;
8217 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008218 *str = ptr;
8219 return(entity);
8220}
8221
8222/**
8223 * xmlParseDocTypeDecl:
8224 * @ctxt: an XML parser context
8225 *
8226 * parse a DOCTYPE declaration
8227 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008228 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008229 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8230 *
8231 * [ VC: Root Element Type ]
8232 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008233 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008234 */
8235
8236void
8237xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008238 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008239 xmlChar *ExternalID = NULL;
8240 xmlChar *URI = NULL;
8241
8242 /*
8243 * We know that '<!DOCTYPE' has been detected.
8244 */
8245 SKIP(9);
8246
8247 SKIP_BLANKS;
8248
8249 /*
8250 * Parse the DOCTYPE name.
8251 */
8252 name = xmlParseName(ctxt);
8253 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008254 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8255 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008256 }
8257 ctxt->intSubName = name;
8258
8259 SKIP_BLANKS;
8260
8261 /*
8262 * Check for SystemID and ExternalID
8263 */
8264 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8265
8266 if ((URI != NULL) || (ExternalID != NULL)) {
8267 ctxt->hasExternalSubset = 1;
8268 }
8269 ctxt->extSubURI = URI;
8270 ctxt->extSubSystem = ExternalID;
8271
8272 SKIP_BLANKS;
8273
8274 /*
8275 * Create and update the internal subset.
8276 */
8277 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8278 (!ctxt->disableSAX))
8279 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8280
8281 /*
8282 * Is there any internal subset declarations ?
8283 * they are handled separately in xmlParseInternalSubset()
8284 */
8285 if (RAW == '[')
8286 return;
8287
8288 /*
8289 * We should be at the end of the DOCTYPE declaration.
8290 */
8291 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008292 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008293 }
8294 NEXT;
8295}
8296
8297/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008298 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008299 * @ctxt: an XML parser context
8300 *
8301 * parse the internal subset declaration
8302 *
8303 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8304 */
8305
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008306static void
Owen Taylor3473f882001-02-23 17:55:21 +00008307xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8308 /*
8309 * Is there any DTD definition ?
8310 */
8311 if (RAW == '[') {
8312 ctxt->instate = XML_PARSER_DTD;
8313 NEXT;
8314 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008315 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008316 * PEReferences.
8317 * Subsequence (markupdecl | PEReference | S)*
8318 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008319 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008320 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008321 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008322
8323 SKIP_BLANKS;
8324 xmlParseMarkupDecl(ctxt);
8325 xmlParsePEReference(ctxt);
8326
8327 /*
8328 * Pop-up of finished entities.
8329 */
8330 while ((RAW == 0) && (ctxt->inputNr > 1))
8331 xmlPopInput(ctxt);
8332
8333 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008334 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008335 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008336 break;
8337 }
8338 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008339 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008340 NEXT;
8341 SKIP_BLANKS;
8342 }
8343 }
8344
8345 /*
8346 * We should be at the end of the DOCTYPE declaration.
8347 */
8348 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008349 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008350 }
8351 NEXT;
8352}
8353
Daniel Veillard81273902003-09-30 00:43:48 +00008354#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008355/**
8356 * xmlParseAttribute:
8357 * @ctxt: an XML parser context
8358 * @value: a xmlChar ** used to store the value of the attribute
8359 *
8360 * parse an attribute
8361 *
8362 * [41] Attribute ::= Name Eq AttValue
8363 *
8364 * [ WFC: No External Entity References ]
8365 * Attribute values cannot contain direct or indirect entity references
8366 * to external entities.
8367 *
8368 * [ WFC: No < in Attribute Values ]
8369 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008370 * an attribute value (other than "&lt;") must not contain a <.
8371 *
Owen Taylor3473f882001-02-23 17:55:21 +00008372 * [ VC: Attribute Value Type ]
8373 * The attribute must have been declared; the value must be of the type
8374 * declared for it.
8375 *
8376 * [25] Eq ::= S? '=' S?
8377 *
8378 * With namespace:
8379 *
8380 * [NS 11] Attribute ::= QName Eq AttValue
8381 *
8382 * Also the case QName == xmlns:??? is handled independently as a namespace
8383 * definition.
8384 *
8385 * Returns the attribute name, and the value in *value.
8386 */
8387
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008388const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008389xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008390 const xmlChar *name;
8391 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008392
8393 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008394 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008395 name = xmlParseName(ctxt);
8396 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008397 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008398 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008399 return(NULL);
8400 }
8401
8402 /*
8403 * read the value
8404 */
8405 SKIP_BLANKS;
8406 if (RAW == '=') {
8407 NEXT;
8408 SKIP_BLANKS;
8409 val = xmlParseAttValue(ctxt);
8410 ctxt->instate = XML_PARSER_CONTENT;
8411 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008412 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008413 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008414 return(NULL);
8415 }
8416
8417 /*
8418 * Check that xml:lang conforms to the specification
8419 * No more registered as an error, just generate a warning now
8420 * since this was deprecated in XML second edition
8421 */
8422 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8423 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008424 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8425 "Malformed value for xml:lang : %s\n",
8426 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008427 }
8428 }
8429
8430 /*
8431 * Check that xml:space conforms to the specification
8432 */
8433 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8434 if (xmlStrEqual(val, BAD_CAST "default"))
8435 *(ctxt->space) = 0;
8436 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8437 *(ctxt->space) = 1;
8438 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008439 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008440"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008441 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008442 }
8443 }
8444
8445 *value = val;
8446 return(name);
8447}
8448
8449/**
8450 * xmlParseStartTag:
8451 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008452 *
Owen Taylor3473f882001-02-23 17:55:21 +00008453 * parse a start of tag either for rule element or
8454 * EmptyElement. In both case we don't parse the tag closing chars.
8455 *
8456 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8457 *
8458 * [ WFC: Unique Att Spec ]
8459 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008460 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008461 *
8462 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8463 *
8464 * [ WFC: Unique Att Spec ]
8465 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008466 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008467 *
8468 * With namespace:
8469 *
8470 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8471 *
8472 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8473 *
8474 * Returns the element name parsed
8475 */
8476
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008477const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008478xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008479 const xmlChar *name;
8480 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008481 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008482 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008483 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008484 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008485 int i;
8486
8487 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008488 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008489
8490 name = xmlParseName(ctxt);
8491 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008492 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008493 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008494 return(NULL);
8495 }
8496
8497 /*
8498 * Now parse the attributes, it ends up with the ending
8499 *
8500 * (S Attribute)* S?
8501 */
8502 SKIP_BLANKS;
8503 GROW;
8504
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008505 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008506 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008507 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008508 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008509 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008510
8511 attname = xmlParseAttribute(ctxt, &attvalue);
8512 if ((attname != NULL) && (attvalue != NULL)) {
8513 /*
8514 * [ WFC: Unique Att Spec ]
8515 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008516 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008517 */
8518 for (i = 0; i < nbatts;i += 2) {
8519 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008520 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008521 xmlFree(attvalue);
8522 goto failed;
8523 }
8524 }
Owen Taylor3473f882001-02-23 17:55:21 +00008525 /*
8526 * Add the pair to atts
8527 */
8528 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008529 maxatts = 22; /* allow for 10 attrs by default */
8530 atts = (const xmlChar **)
8531 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008532 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008533 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008534 if (attvalue != NULL)
8535 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008536 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008537 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008538 ctxt->atts = atts;
8539 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008540 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008541 const xmlChar **n;
8542
Owen Taylor3473f882001-02-23 17:55:21 +00008543 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008544 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008545 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008546 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008547 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008548 if (attvalue != NULL)
8549 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008550 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008551 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008552 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008553 ctxt->atts = atts;
8554 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008555 }
8556 atts[nbatts++] = attname;
8557 atts[nbatts++] = attvalue;
8558 atts[nbatts] = NULL;
8559 atts[nbatts + 1] = NULL;
8560 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008561 if (attvalue != NULL)
8562 xmlFree(attvalue);
8563 }
8564
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008565failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008566
Daniel Veillard3772de32002-12-17 10:31:45 +00008567 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008568 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8569 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008570 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008571 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8572 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008573 }
8574 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008575 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8576 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008577 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8578 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008579 break;
8580 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008581 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008582 GROW;
8583 }
8584
8585 /*
8586 * SAX: Start of Element !
8587 */
8588 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008589 (!ctxt->disableSAX)) {
8590 if (nbatts > 0)
8591 ctxt->sax->startElement(ctxt->userData, name, atts);
8592 else
8593 ctxt->sax->startElement(ctxt->userData, name, NULL);
8594 }
Owen Taylor3473f882001-02-23 17:55:21 +00008595
8596 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008597 /* Free only the content strings */
8598 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008599 if (atts[i] != NULL)
8600 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008601 }
8602 return(name);
8603}
8604
8605/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008606 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008607 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008608 * @line: line of the start tag
8609 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008610 *
8611 * parse an end of tag
8612 *
8613 * [42] ETag ::= '</' Name S? '>'
8614 *
8615 * With namespace
8616 *
8617 * [NS 9] ETag ::= '</' QName S? '>'
8618 */
8619
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008620static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008621xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008622 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008623
8624 GROW;
8625 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008626 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008627 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008628 return;
8629 }
8630 SKIP(2);
8631
Daniel Veillard46de64e2002-05-29 08:21:33 +00008632 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008633
8634 /*
8635 * We should definitely be at the ending "S? '>'" part
8636 */
8637 GROW;
8638 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008639 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008640 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008641 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008642 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008643
8644 /*
8645 * [ WFC: Element Type Match ]
8646 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008647 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008648 *
8649 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008650 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008651 if (name == NULL) name = BAD_CAST "unparseable";
8652 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008653 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008654 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008655 }
8656
8657 /*
8658 * SAX: End of Tag
8659 */
8660 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8661 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008662 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008663
Daniel Veillarde57ec792003-09-10 10:50:59 +00008664 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008665 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008666 return;
8667}
8668
8669/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008670 * xmlParseEndTag:
8671 * @ctxt: an XML parser context
8672 *
8673 * parse an end of tag
8674 *
8675 * [42] ETag ::= '</' Name S? '>'
8676 *
8677 * With namespace
8678 *
8679 * [NS 9] ETag ::= '</' QName S? '>'
8680 */
8681
8682void
8683xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008684 xmlParseEndTag1(ctxt, 0);
8685}
Daniel Veillard81273902003-09-30 00:43:48 +00008686#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008687
8688/************************************************************************
8689 * *
8690 * SAX 2 specific operations *
8691 * *
8692 ************************************************************************/
8693
Daniel Veillard0fb18932003-09-07 09:14:37 +00008694/*
8695 * xmlGetNamespace:
8696 * @ctxt: an XML parser context
8697 * @prefix: the prefix to lookup
8698 *
8699 * Lookup the namespace name for the @prefix (which ca be NULL)
8700 * The prefix must come from the @ctxt->dict dictionnary
8701 *
8702 * Returns the namespace name or NULL if not bound
8703 */
8704static const xmlChar *
8705xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8706 int i;
8707
Daniel Veillarde57ec792003-09-10 10:50:59 +00008708 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008709 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008710 if (ctxt->nsTab[i] == prefix) {
8711 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8712 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008713 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008714 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008715 return(NULL);
8716}
8717
8718/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008719 * xmlParseQName:
8720 * @ctxt: an XML parser context
8721 * @prefix: pointer to store the prefix part
8722 *
8723 * parse an XML Namespace QName
8724 *
8725 * [6] QName ::= (Prefix ':')? LocalPart
8726 * [7] Prefix ::= NCName
8727 * [8] LocalPart ::= NCName
8728 *
8729 * Returns the Name parsed or NULL
8730 */
8731
8732static const xmlChar *
8733xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8734 const xmlChar *l, *p;
8735
8736 GROW;
8737
8738 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008739 if (l == NULL) {
8740 if (CUR == ':') {
8741 l = xmlParseName(ctxt);
8742 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008743 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008744 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008745 *prefix = NULL;
8746 return(l);
8747 }
8748 }
8749 return(NULL);
8750 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008751 if (CUR == ':') {
8752 NEXT;
8753 p = l;
8754 l = xmlParseNCName(ctxt);
8755 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008756 xmlChar *tmp;
8757
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008758 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8759 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008760 l = xmlParseNmtoken(ctxt);
8761 if (l == NULL)
8762 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8763 else {
8764 tmp = xmlBuildQName(l, p, NULL, 0);
8765 xmlFree((char *)l);
8766 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008767 p = xmlDictLookup(ctxt->dict, tmp, -1);
8768 if (tmp != NULL) xmlFree(tmp);
8769 *prefix = NULL;
8770 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008771 }
8772 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008773 xmlChar *tmp;
8774
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008775 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8776 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008777 NEXT;
8778 tmp = (xmlChar *) xmlParseName(ctxt);
8779 if (tmp != NULL) {
8780 tmp = xmlBuildQName(tmp, l, NULL, 0);
8781 l = xmlDictLookup(ctxt->dict, tmp, -1);
8782 if (tmp != NULL) xmlFree(tmp);
8783 *prefix = p;
8784 return(l);
8785 }
8786 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8787 l = xmlDictLookup(ctxt->dict, tmp, -1);
8788 if (tmp != NULL) xmlFree(tmp);
8789 *prefix = p;
8790 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008791 }
8792 *prefix = p;
8793 } else
8794 *prefix = NULL;
8795 return(l);
8796}
8797
8798/**
8799 * xmlParseQNameAndCompare:
8800 * @ctxt: an XML parser context
8801 * @name: the localname
8802 * @prefix: the prefix, if any.
8803 *
8804 * parse an XML name and compares for match
8805 * (specialized for endtag parsing)
8806 *
8807 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8808 * and the name for mismatch
8809 */
8810
8811static const xmlChar *
8812xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8813 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008814 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815 const xmlChar *in;
8816 const xmlChar *ret;
8817 const xmlChar *prefix2;
8818
8819 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8820
8821 GROW;
8822 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008823
Daniel Veillard0fb18932003-09-07 09:14:37 +00008824 cmp = prefix;
8825 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008826 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008827 ++cmp;
8828 }
8829 if ((*cmp == 0) && (*in == ':')) {
8830 in++;
8831 cmp = name;
8832 while (*in != 0 && *in == *cmp) {
8833 ++in;
8834 ++cmp;
8835 }
William M. Brack76e95df2003-10-18 16:20:14 +00008836 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008837 /* success */
8838 ctxt->input->cur = in;
8839 return((const xmlChar*) 1);
8840 }
8841 }
8842 /*
8843 * all strings coms from the dictionary, equality can be done directly
8844 */
8845 ret = xmlParseQName (ctxt, &prefix2);
8846 if ((ret == name) && (prefix == prefix2))
8847 return((const xmlChar*) 1);
8848 return ret;
8849}
8850
8851/**
8852 * xmlParseAttValueInternal:
8853 * @ctxt: an XML parser context
8854 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008855 * @alloc: whether the attribute was reallocated as a new string
8856 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008857 *
8858 * parse a value for an attribute.
8859 * NOTE: if no normalization is needed, the routine will return pointers
8860 * directly from the data buffer.
8861 *
8862 * 3.3.3 Attribute-Value Normalization:
8863 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008864 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008865 * - a character reference is processed by appending the referenced
8866 * character to the attribute value
8867 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008868 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008869 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8870 * appending #x20 to the normalized value, except that only a single
8871 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008872 * parsed entity or the literal entity value of an internal parsed entity
8873 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008874 * If the declared value is not CDATA, then the XML processor must further
8875 * process the normalized attribute value by discarding any leading and
8876 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008877 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008878 * All attributes for which no declaration has been read should be treated
8879 * by a non-validating parser as if declared CDATA.
8880 *
8881 * Returns the AttValue parsed or NULL. The value has to be freed by the
8882 * caller if it was copied, this can be detected by val[*len] == 0.
8883 */
8884
8885static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008886xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8887 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008888{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008889 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008890 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008891 xmlChar *ret = NULL;
8892
8893 GROW;
8894 in = (xmlChar *) CUR_PTR;
8895 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008896 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008897 return (NULL);
8898 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008899 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008900
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008901 /*
8902 * try to handle in this routine the most common case where no
8903 * allocation of a new string is required and where content is
8904 * pure ASCII.
8905 */
8906 limit = *in++;
8907 end = ctxt->input->end;
8908 start = in;
8909 if (in >= end) {
8910 const xmlChar *oldbase = ctxt->input->base;
8911 GROW;
8912 if (oldbase != ctxt->input->base) {
8913 long delta = ctxt->input->base - oldbase;
8914 start = start + delta;
8915 in = in + delta;
8916 }
8917 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008918 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008919 if (normalize) {
8920 /*
8921 * Skip any leading spaces
8922 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008923 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008924 ((*in == 0x20) || (*in == 0x9) ||
8925 (*in == 0xA) || (*in == 0xD))) {
8926 in++;
8927 start = in;
8928 if (in >= end) {
8929 const xmlChar *oldbase = ctxt->input->base;
8930 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008931 if (ctxt->instate == XML_PARSER_EOF)
8932 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008933 if (oldbase != ctxt->input->base) {
8934 long delta = ctxt->input->base - oldbase;
8935 start = start + delta;
8936 in = in + delta;
8937 }
8938 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008939 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8940 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8941 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008942 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008943 return(NULL);
8944 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008945 }
8946 }
8947 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8948 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8949 if ((*in++ == 0x20) && (*in == 0x20)) break;
8950 if (in >= end) {
8951 const xmlChar *oldbase = ctxt->input->base;
8952 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008953 if (ctxt->instate == XML_PARSER_EOF)
8954 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008955 if (oldbase != ctxt->input->base) {
8956 long delta = ctxt->input->base - oldbase;
8957 start = start + delta;
8958 in = in + delta;
8959 }
8960 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008961 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8962 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8963 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008964 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008965 return(NULL);
8966 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008967 }
8968 }
8969 last = in;
8970 /*
8971 * skip the trailing blanks
8972 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008973 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008974 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008975 ((*in == 0x20) || (*in == 0x9) ||
8976 (*in == 0xA) || (*in == 0xD))) {
8977 in++;
8978 if (in >= end) {
8979 const xmlChar *oldbase = ctxt->input->base;
8980 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008981 if (ctxt->instate == XML_PARSER_EOF)
8982 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008983 if (oldbase != ctxt->input->base) {
8984 long delta = ctxt->input->base - oldbase;
8985 start = start + delta;
8986 in = in + delta;
8987 last = last + delta;
8988 }
8989 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008990 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8991 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8992 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008993 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008994 return(NULL);
8995 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008996 }
8997 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008998 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8999 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9000 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009001 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009002 return(NULL);
9003 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009004 if (*in != limit) goto need_complex;
9005 } else {
9006 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9007 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9008 in++;
9009 if (in >= end) {
9010 const xmlChar *oldbase = ctxt->input->base;
9011 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009012 if (ctxt->instate == XML_PARSER_EOF)
9013 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009014 if (oldbase != ctxt->input->base) {
9015 long delta = ctxt->input->base - oldbase;
9016 start = start + delta;
9017 in = in + delta;
9018 }
9019 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009020 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9021 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9022 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009023 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009024 return(NULL);
9025 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009026 }
9027 }
9028 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009029 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9030 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9031 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009032 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009033 return(NULL);
9034 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009035 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009036 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009037 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009038 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009039 *len = last - start;
9040 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009041 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009042 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009043 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009044 }
9045 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009046 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009047 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009048need_complex:
9049 if (alloc) *alloc = 1;
9050 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009051}
9052
9053/**
9054 * xmlParseAttribute2:
9055 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009056 * @pref: the element prefix
9057 * @elem: the element name
9058 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009059 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009060 * @len: an int * to save the length of the attribute
9061 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009062 *
9063 * parse an attribute in the new SAX2 framework.
9064 *
9065 * Returns the attribute name, and the value in *value, .
9066 */
9067
9068static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009069xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009070 const xmlChar * pref, const xmlChar * elem,
9071 const xmlChar ** prefix, xmlChar ** value,
9072 int *len, int *alloc)
9073{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009074 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009075 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009076 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009077
9078 *value = NULL;
9079 GROW;
9080 name = xmlParseQName(ctxt, prefix);
9081 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009082 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9083 "error parsing attribute name\n");
9084 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009085 }
9086
9087 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009088 * get the type if needed
9089 */
9090 if (ctxt->attsSpecial != NULL) {
9091 int type;
9092
9093 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009094 pref, elem, *prefix, name);
9095 if (type != 0)
9096 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009097 }
9098
9099 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009100 * read the value
9101 */
9102 SKIP_BLANKS;
9103 if (RAW == '=') {
9104 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009105 SKIP_BLANKS;
9106 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9107 if (normalize) {
9108 /*
9109 * Sometimes a second normalisation pass for spaces is needed
9110 * but that only happens if charrefs or entities refernces
9111 * have been used in the attribute value, i.e. the attribute
9112 * value have been extracted in an allocated string already.
9113 */
9114 if (*alloc) {
9115 const xmlChar *val2;
9116
9117 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009118 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009119 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009120 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009121 }
9122 }
9123 }
9124 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009125 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009126 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9127 "Specification mandate value for attribute %s\n",
9128 name);
9129 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009130 }
9131
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009132 if (*prefix == ctxt->str_xml) {
9133 /*
9134 * Check that xml:lang conforms to the specification
9135 * No more registered as an error, just generate a warning now
9136 * since this was deprecated in XML second edition
9137 */
9138 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9139 internal_val = xmlStrndup(val, *len);
9140 if (!xmlCheckLanguageID(internal_val)) {
9141 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9142 "Malformed value for xml:lang : %s\n",
9143 internal_val, NULL);
9144 }
9145 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009146
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009147 /*
9148 * Check that xml:space conforms to the specification
9149 */
9150 if (xmlStrEqual(name, BAD_CAST "space")) {
9151 internal_val = xmlStrndup(val, *len);
9152 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9153 *(ctxt->space) = 0;
9154 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9155 *(ctxt->space) = 1;
9156 else {
9157 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9158 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9159 internal_val, NULL);
9160 }
9161 }
9162 if (internal_val) {
9163 xmlFree(internal_val);
9164 }
9165 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009166
9167 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009168 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009169}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009170/**
9171 * xmlParseStartTag2:
9172 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009173 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009174 * parse a start of tag either for rule element or
9175 * EmptyElement. In both case we don't parse the tag closing chars.
9176 * This routine is called when running SAX2 parsing
9177 *
9178 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9179 *
9180 * [ WFC: Unique Att Spec ]
9181 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009182 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009183 *
9184 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9185 *
9186 * [ WFC: Unique Att Spec ]
9187 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009188 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009189 *
9190 * With namespace:
9191 *
9192 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9193 *
9194 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9195 *
9196 * Returns the element name parsed
9197 */
9198
9199static const xmlChar *
9200xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009201 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009202 const xmlChar *localname;
9203 const xmlChar *prefix;
9204 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009205 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009206 const xmlChar *nsname;
9207 xmlChar *attvalue;
9208 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009209 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009210 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009211 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009212 const xmlChar *base;
9213 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009214 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009215
9216 if (RAW != '<') return(NULL);
9217 NEXT1;
9218
9219 /*
9220 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9221 * point since the attribute values may be stored as pointers to
9222 * the buffer and calling SHRINK would destroy them !
9223 * The Shrinking is only possible once the full set of attribute
9224 * callbacks have been done.
9225 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009226reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009227 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009228 base = ctxt->input->base;
9229 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009230 oldline = ctxt->input->line;
9231 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009232 nbatts = 0;
9233 nratts = 0;
9234 nbdef = 0;
9235 nbNs = 0;
9236 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009237 /* Forget any namespaces added during an earlier parse of this element. */
9238 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009239
9240 localname = xmlParseQName(ctxt, &prefix);
9241 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009242 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9243 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009244 return(NULL);
9245 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009246 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009247
9248 /*
9249 * Now parse the attributes, it ends up with the ending
9250 *
9251 * (S Attribute)* S?
9252 */
9253 SKIP_BLANKS;
9254 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009255 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009256
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009257 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009258 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009259 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009260 const xmlChar *q = CUR_PTR;
9261 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009262 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009263
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009264 attname = xmlParseAttribute2(ctxt, prefix, localname,
9265 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009266 if (ctxt->input->base != base) {
9267 if ((attvalue != NULL) && (alloc != 0))
9268 xmlFree(attvalue);
9269 attvalue = NULL;
9270 goto base_changed;
9271 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009272 if ((attname != NULL) && (attvalue != NULL)) {
9273 if (len < 0) len = xmlStrlen(attvalue);
9274 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009275 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9276 xmlURIPtr uri;
9277
9278 if (*URL != 0) {
9279 uri = xmlParseURI((const char *) URL);
9280 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009281 xmlNsErr(ctxt, XML_WAR_NS_URI,
9282 "xmlns: '%s' is not a valid URI\n",
9283 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009284 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009285 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009286 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9287 "xmlns: URI %s is not absolute\n",
9288 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009289 }
9290 xmlFreeURI(uri);
9291 }
Daniel Veillard37334572008-07-31 08:20:02 +00009292 if (URL == ctxt->str_xml_ns) {
9293 if (attname != ctxt->str_xml) {
9294 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9295 "xml namespace URI cannot be the default namespace\n",
9296 NULL, NULL, NULL);
9297 }
9298 goto skip_default_ns;
9299 }
9300 if ((len == 29) &&
9301 (xmlStrEqual(URL,
9302 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9303 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9304 "reuse of the xmlns namespace name is forbidden\n",
9305 NULL, NULL, NULL);
9306 goto skip_default_ns;
9307 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009308 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009309 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009310 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009311 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009312 for (j = 1;j <= nbNs;j++)
9313 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9314 break;
9315 if (j <= nbNs)
9316 xmlErrAttributeDup(ctxt, NULL, attname);
9317 else
9318 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009319skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009320 if (alloc != 0) xmlFree(attvalue);
9321 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009322 continue;
9323 }
9324 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009325 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9326 xmlURIPtr uri;
9327
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009328 if (attname == ctxt->str_xml) {
9329 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009330 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9331 "xml namespace prefix mapped to wrong URI\n",
9332 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009333 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009334 /*
9335 * Do not keep a namespace definition node
9336 */
Daniel Veillard37334572008-07-31 08:20:02 +00009337 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009338 }
Daniel Veillard37334572008-07-31 08:20:02 +00009339 if (URL == ctxt->str_xml_ns) {
9340 if (attname != ctxt->str_xml) {
9341 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9342 "xml namespace URI mapped to wrong prefix\n",
9343 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009344 }
Daniel Veillard37334572008-07-31 08:20:02 +00009345 goto skip_ns;
9346 }
9347 if (attname == ctxt->str_xmlns) {
9348 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9349 "redefinition of the xmlns prefix is forbidden\n",
9350 NULL, NULL, NULL);
9351 goto skip_ns;
9352 }
9353 if ((len == 29) &&
9354 (xmlStrEqual(URL,
9355 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9356 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9357 "reuse of the xmlns namespace name is forbidden\n",
9358 NULL, NULL, NULL);
9359 goto skip_ns;
9360 }
9361 if ((URL == NULL) || (URL[0] == 0)) {
9362 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9363 "xmlns:%s: Empty XML namespace is not allowed\n",
9364 attname, NULL, NULL);
9365 goto skip_ns;
9366 } else {
9367 uri = xmlParseURI((const char *) URL);
9368 if (uri == NULL) {
9369 xmlNsErr(ctxt, XML_WAR_NS_URI,
9370 "xmlns:%s: '%s' is not a valid URI\n",
9371 attname, URL, NULL);
9372 } else {
9373 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9374 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9375 "xmlns:%s: URI %s is not absolute\n",
9376 attname, URL, NULL);
9377 }
9378 xmlFreeURI(uri);
9379 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009380 }
9381
Daniel Veillard0fb18932003-09-07 09:14:37 +00009382 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009383 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009384 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009385 for (j = 1;j <= nbNs;j++)
9386 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9387 break;
9388 if (j <= nbNs)
9389 xmlErrAttributeDup(ctxt, aprefix, attname);
9390 else
9391 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009392skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009393 if (alloc != 0) xmlFree(attvalue);
9394 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009395 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009396 continue;
9397 }
9398
9399 /*
9400 * Add the pair to atts
9401 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009402 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9403 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009404 if (attvalue[len] == 0)
9405 xmlFree(attvalue);
9406 goto failed;
9407 }
9408 maxatts = ctxt->maxatts;
9409 atts = ctxt->atts;
9410 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009411 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009412 atts[nbatts++] = attname;
9413 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009414 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009415 atts[nbatts++] = attvalue;
9416 attvalue += len;
9417 atts[nbatts++] = attvalue;
9418 /*
9419 * tag if some deallocation is needed
9420 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009421 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009422 } else {
9423 if ((attvalue != NULL) && (attvalue[len] == 0))
9424 xmlFree(attvalue);
9425 }
9426
Daniel Veillard37334572008-07-31 08:20:02 +00009427failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009428
9429 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009430 if (ctxt->instate == XML_PARSER_EOF)
9431 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009432 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009433 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9434 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009435 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009436 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9437 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009438 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009439 }
9440 SKIP_BLANKS;
9441 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9442 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009443 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009444 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009445 break;
9446 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009447 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009448 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009449 }
9450
Daniel Veillard0fb18932003-09-07 09:14:37 +00009451 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009452 * The attributes defaulting
9453 */
9454 if (ctxt->attsDefault != NULL) {
9455 xmlDefAttrsPtr defaults;
9456
9457 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9458 if (defaults != NULL) {
9459 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009460 attname = defaults->values[5 * i];
9461 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009462
9463 /*
9464 * special work for namespaces defaulted defs
9465 */
9466 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9467 /*
9468 * check that it's not a defined namespace
9469 */
9470 for (j = 1;j <= nbNs;j++)
9471 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9472 break;
9473 if (j <= nbNs) continue;
9474
9475 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009476 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009477 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009478 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009479 nbNs++;
9480 }
9481 } else if (aprefix == ctxt->str_xmlns) {
9482 /*
9483 * check that it's not a defined namespace
9484 */
9485 for (j = 1;j <= nbNs;j++)
9486 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9487 break;
9488 if (j <= nbNs) continue;
9489
9490 nsname = xmlGetNamespace(ctxt, attname);
9491 if (nsname != defaults->values[2]) {
9492 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009493 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009494 nbNs++;
9495 }
9496 } else {
9497 /*
9498 * check that it's not a defined attribute
9499 */
9500 for (j = 0;j < nbatts;j+=5) {
9501 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9502 break;
9503 }
9504 if (j < nbatts) continue;
9505
9506 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9507 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009508 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009509 }
9510 maxatts = ctxt->maxatts;
9511 atts = ctxt->atts;
9512 }
9513 atts[nbatts++] = attname;
9514 atts[nbatts++] = aprefix;
9515 if (aprefix == NULL)
9516 atts[nbatts++] = NULL;
9517 else
9518 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009519 atts[nbatts++] = defaults->values[5 * i + 2];
9520 atts[nbatts++] = defaults->values[5 * i + 3];
9521 if ((ctxt->standalone == 1) &&
9522 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009523 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009524 "standalone: attribute %s on %s defaulted from external subset\n",
9525 attname, localname);
9526 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009527 nbdef++;
9528 }
9529 }
9530 }
9531 }
9532
Daniel Veillarde70c8772003-11-25 07:21:18 +00009533 /*
9534 * The attributes checkings
9535 */
9536 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009537 /*
9538 * The default namespace does not apply to attribute names.
9539 */
9540 if (atts[i + 1] != NULL) {
9541 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9542 if (nsname == NULL) {
9543 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9544 "Namespace prefix %s for %s on %s is not defined\n",
9545 atts[i + 1], atts[i], localname);
9546 }
9547 atts[i + 2] = nsname;
9548 } else
9549 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009550 /*
9551 * [ WFC: Unique Att Spec ]
9552 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009553 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009554 * As extended by the Namespace in XML REC.
9555 */
9556 for (j = 0; j < i;j += 5) {
9557 if (atts[i] == atts[j]) {
9558 if (atts[i+1] == atts[j+1]) {
9559 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9560 break;
9561 }
9562 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9563 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9564 "Namespaced Attribute %s in '%s' redefined\n",
9565 atts[i], nsname, NULL);
9566 break;
9567 }
9568 }
9569 }
9570 }
9571
Daniel Veillarde57ec792003-09-10 10:50:59 +00009572 nsname = xmlGetNamespace(ctxt, prefix);
9573 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009574 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9575 "Namespace prefix %s on %s is not defined\n",
9576 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009577 }
9578 *pref = prefix;
9579 *URI = nsname;
9580
9581 /*
9582 * SAX: Start of Element !
9583 */
9584 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9585 (!ctxt->disableSAX)) {
9586 if (nbNs > 0)
9587 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9588 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9589 nbatts / 5, nbdef, atts);
9590 else
9591 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9592 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9593 }
9594
9595 /*
9596 * Free up attribute allocated strings if needed
9597 */
9598 if (attval != 0) {
9599 for (i = 3,j = 0; j < nratts;i += 5,j++)
9600 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9601 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009602 }
9603
9604 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009605
9606base_changed:
9607 /*
9608 * the attribute strings are valid iif the base didn't changed
9609 */
9610 if (attval != 0) {
9611 for (i = 3,j = 0; j < nratts;i += 5,j++)
9612 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9613 xmlFree((xmlChar *) atts[i]);
9614 }
9615 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009616 ctxt->input->line = oldline;
9617 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009618 if (ctxt->wellFormed == 1) {
9619 goto reparse;
9620 }
9621 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009622}
9623
9624/**
9625 * xmlParseEndTag2:
9626 * @ctxt: an XML parser context
9627 * @line: line of the start tag
9628 * @nsNr: number of namespaces on the start tag
9629 *
9630 * parse an end of tag
9631 *
9632 * [42] ETag ::= '</' Name S? '>'
9633 *
9634 * With namespace
9635 *
9636 * [NS 9] ETag ::= '</' QName S? '>'
9637 */
9638
9639static void
9640xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009641 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009642 const xmlChar *name;
9643
9644 GROW;
9645 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009646 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009647 return;
9648 }
9649 SKIP(2);
9650
William M. Brack13dfa872004-09-18 04:52:08 +00009651 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009652 if (ctxt->input->cur[tlen] == '>') {
9653 ctxt->input->cur += tlen + 1;
9654 goto done;
9655 }
9656 ctxt->input->cur += tlen;
9657 name = (xmlChar*)1;
9658 } else {
9659 if (prefix == NULL)
9660 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9661 else
9662 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9663 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009664
9665 /*
9666 * We should definitely be at the ending "S? '>'" part
9667 */
9668 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009669 if (ctxt->instate == XML_PARSER_EOF)
9670 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009671 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009672 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009673 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009674 } else
9675 NEXT1;
9676
9677 /*
9678 * [ WFC: Element Type Match ]
9679 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009680 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009681 *
9682 */
9683 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009684 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009685 if ((line == 0) && (ctxt->node != NULL))
9686 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009687 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009688 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009689 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009690 }
9691
9692 /*
9693 * SAX: End of Tag
9694 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009695done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009696 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9697 (!ctxt->disableSAX))
9698 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9699
Daniel Veillard0fb18932003-09-07 09:14:37 +00009700 spacePop(ctxt);
9701 if (nsNr != 0)
9702 nsPop(ctxt, nsNr);
9703 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009704}
9705
9706/**
Owen Taylor3473f882001-02-23 17:55:21 +00009707 * xmlParseCDSect:
9708 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009709 *
Owen Taylor3473f882001-02-23 17:55:21 +00009710 * Parse escaped pure raw content.
9711 *
9712 * [18] CDSect ::= CDStart CData CDEnd
9713 *
9714 * [19] CDStart ::= '<![CDATA['
9715 *
9716 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9717 *
9718 * [21] CDEnd ::= ']]>'
9719 */
9720void
9721xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9722 xmlChar *buf = NULL;
9723 int len = 0;
9724 int size = XML_PARSER_BUFFER_SIZE;
9725 int r, rl;
9726 int s, sl;
9727 int cur, l;
9728 int count = 0;
9729
Daniel Veillard8f597c32003-10-06 08:19:27 +00009730 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009731 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009732 SKIP(9);
9733 } else
9734 return;
9735
9736 ctxt->instate = XML_PARSER_CDATA_SECTION;
9737 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009738 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009739 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009740 ctxt->instate = XML_PARSER_CONTENT;
9741 return;
9742 }
9743 NEXTL(rl);
9744 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009745 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009746 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009747 ctxt->instate = XML_PARSER_CONTENT;
9748 return;
9749 }
9750 NEXTL(sl);
9751 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009752 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009753 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009754 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009755 return;
9756 }
William M. Brack871611b2003-10-18 04:53:14 +00009757 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009758 ((r != ']') || (s != ']') || (cur != '>'))) {
9759 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009760 xmlChar *tmp;
9761
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009762 if ((size > XML_MAX_TEXT_LENGTH) &&
9763 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9764 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9765 "CData section too big found", NULL);
9766 xmlFree (buf);
9767 return;
9768 }
9769 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009770 if (tmp == NULL) {
9771 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009772 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009773 return;
9774 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009775 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009776 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009777 }
9778 COPY_BUF(rl,buf,len,r);
9779 r = s;
9780 rl = sl;
9781 s = cur;
9782 sl = l;
9783 count++;
9784 if (count > 50) {
9785 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009786 if (ctxt->instate == XML_PARSER_EOF) {
9787 xmlFree(buf);
9788 return;
9789 }
Owen Taylor3473f882001-02-23 17:55:21 +00009790 count = 0;
9791 }
9792 NEXTL(l);
9793 cur = CUR_CHAR(l);
9794 }
9795 buf[len] = 0;
9796 ctxt->instate = XML_PARSER_CONTENT;
9797 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009798 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009799 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009800 xmlFree(buf);
9801 return;
9802 }
9803 NEXTL(l);
9804
9805 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009806 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009807 */
9808 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9809 if (ctxt->sax->cdataBlock != NULL)
9810 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009811 else if (ctxt->sax->characters != NULL)
9812 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009813 }
9814 xmlFree(buf);
9815}
9816
9817/**
9818 * xmlParseContent:
9819 * @ctxt: an XML parser context
9820 *
9821 * Parse a content:
9822 *
9823 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9824 */
9825
9826void
9827xmlParseContent(xmlParserCtxtPtr ctxt) {
9828 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009829 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009830 ((RAW != '<') || (NXT(1) != '/')) &&
9831 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009832 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009833 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009834 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009835
9836 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009837 * First case : a Processing Instruction.
9838 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009839 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009840 xmlParsePI(ctxt);
9841 }
9842
9843 /*
9844 * Second case : a CDSection
9845 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009846 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009847 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009848 xmlParseCDSect(ctxt);
9849 }
9850
9851 /*
9852 * Third case : a comment
9853 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009854 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009855 (NXT(2) == '-') && (NXT(3) == '-')) {
9856 xmlParseComment(ctxt);
9857 ctxt->instate = XML_PARSER_CONTENT;
9858 }
9859
9860 /*
9861 * Fourth case : a sub-element.
9862 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009863 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009864 xmlParseElement(ctxt);
9865 }
9866
9867 /*
9868 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009869 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009870 */
9871
Daniel Veillard21a0f912001-02-25 19:54:14 +00009872 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009873 xmlParseReference(ctxt);
9874 }
9875
9876 /*
9877 * Last case, text. Note that References are handled directly.
9878 */
9879 else {
9880 xmlParseCharData(ctxt, 0);
9881 }
9882
9883 GROW;
9884 /*
9885 * Pop-up of finished entities.
9886 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009887 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009888 xmlPopInput(ctxt);
9889 SHRINK;
9890
Daniel Veillardfdc91562002-07-01 21:52:03 +00009891 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009892 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9893 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009894 ctxt->instate = XML_PARSER_EOF;
9895 break;
9896 }
9897 }
9898}
9899
9900/**
9901 * xmlParseElement:
9902 * @ctxt: an XML parser context
9903 *
9904 * parse an XML element, this is highly recursive
9905 *
9906 * [39] element ::= EmptyElemTag | STag content ETag
9907 *
9908 * [ WFC: Element Type Match ]
9909 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009910 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009911 *
Owen Taylor3473f882001-02-23 17:55:21 +00009912 */
9913
9914void
9915xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009916 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009917 const xmlChar *prefix = NULL;
9918 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009919 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009920 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009921 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009922 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009923
Daniel Veillard8915c152008-08-26 13:05:34 +00009924 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9925 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9926 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9927 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9928 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009929 ctxt->instate = XML_PARSER_EOF;
9930 return;
9931 }
9932
Owen Taylor3473f882001-02-23 17:55:21 +00009933 /* Capture start position */
9934 if (ctxt->record_info) {
9935 node_info.begin_pos = ctxt->input->consumed +
9936 (CUR_PTR - ctxt->input->base);
9937 node_info.begin_line = ctxt->input->line;
9938 }
9939
9940 if (ctxt->spaceNr == 0)
9941 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009942 else if (*ctxt->space == -2)
9943 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009944 else
9945 spacePush(ctxt, *ctxt->space);
9946
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009947 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009948#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009949 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009950#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009951 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009952#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009953 else
9954 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009955#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009956 if (ctxt->instate == XML_PARSER_EOF)
9957 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009958 if (name == NULL) {
9959 spacePop(ctxt);
9960 return;
9961 }
9962 namePush(ctxt, name);
9963 ret = ctxt->node;
9964
Daniel Veillard4432df22003-09-28 18:58:27 +00009965#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009966 /*
9967 * [ VC: Root Element Type ]
9968 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009969 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009970 */
9971 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9972 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9973 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009974#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009975
9976 /*
9977 * Check for an Empty Element.
9978 */
9979 if ((RAW == '/') && (NXT(1) == '>')) {
9980 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009981 if (ctxt->sax2) {
9982 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9983 (!ctxt->disableSAX))
9984 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009985#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009986 } else {
9987 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9988 (!ctxt->disableSAX))
9989 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009990#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009991 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009992 namePop(ctxt);
9993 spacePop(ctxt);
9994 if (nsNr != ctxt->nsNr)
9995 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009996 if ( ret != NULL && ctxt->record_info ) {
9997 node_info.end_pos = ctxt->input->consumed +
9998 (CUR_PTR - ctxt->input->base);
9999 node_info.end_line = ctxt->input->line;
10000 node_info.node = ret;
10001 xmlParserAddNodeInfo(ctxt, &node_info);
10002 }
10003 return;
10004 }
10005 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010006 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010007 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010008 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10009 "Couldn't find end of Start Tag %s line %d\n",
10010 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010011
10012 /*
10013 * end of parsing of this node.
10014 */
10015 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010016 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010017 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010018 if (nsNr != ctxt->nsNr)
10019 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010020
10021 /*
10022 * Capture end position and add node
10023 */
10024 if ( ret != NULL && ctxt->record_info ) {
10025 node_info.end_pos = ctxt->input->consumed +
10026 (CUR_PTR - ctxt->input->base);
10027 node_info.end_line = ctxt->input->line;
10028 node_info.node = ret;
10029 xmlParserAddNodeInfo(ctxt, &node_info);
10030 }
10031 return;
10032 }
10033
10034 /*
10035 * Parse the content of the element:
10036 */
10037 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +000010038 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010039 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010040 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010041 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010042
10043 /*
10044 * end of parsing of this node.
10045 */
10046 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010047 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010048 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010049 if (nsNr != ctxt->nsNr)
10050 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010051 return;
10052 }
10053
10054 /*
10055 * parse the end of tag: '</' should be here.
10056 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010057 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010058 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010059 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010060 }
10061#ifdef LIBXML_SAX1_ENABLED
10062 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010063 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010064#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010065
10066 /*
10067 * Capture end position and add node
10068 */
10069 if ( ret != NULL && ctxt->record_info ) {
10070 node_info.end_pos = ctxt->input->consumed +
10071 (CUR_PTR - ctxt->input->base);
10072 node_info.end_line = ctxt->input->line;
10073 node_info.node = ret;
10074 xmlParserAddNodeInfo(ctxt, &node_info);
10075 }
10076}
10077
10078/**
10079 * xmlParseVersionNum:
10080 * @ctxt: an XML parser context
10081 *
10082 * parse the XML version value.
10083 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010084 * [26] VersionNum ::= '1.' [0-9]+
10085 *
10086 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010087 *
10088 * Returns the string giving the XML version number, or NULL
10089 */
10090xmlChar *
10091xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10092 xmlChar *buf = NULL;
10093 int len = 0;
10094 int size = 10;
10095 xmlChar cur;
10096
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010097 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010098 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010099 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010100 return(NULL);
10101 }
10102 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010103 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010104 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010105 return(NULL);
10106 }
10107 buf[len++] = cur;
10108 NEXT;
10109 cur=CUR;
10110 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010111 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010112 return(NULL);
10113 }
10114 buf[len++] = cur;
10115 NEXT;
10116 cur=CUR;
10117 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010118 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010119 xmlChar *tmp;
10120
Owen Taylor3473f882001-02-23 17:55:21 +000010121 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010122 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10123 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010124 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010125 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010126 return(NULL);
10127 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010128 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010129 }
10130 buf[len++] = cur;
10131 NEXT;
10132 cur=CUR;
10133 }
10134 buf[len] = 0;
10135 return(buf);
10136}
10137
10138/**
10139 * xmlParseVersionInfo:
10140 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010141 *
Owen Taylor3473f882001-02-23 17:55:21 +000010142 * parse the XML version.
10143 *
10144 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010145 *
Owen Taylor3473f882001-02-23 17:55:21 +000010146 * [25] Eq ::= S? '=' S?
10147 *
10148 * Returns the version string, e.g. "1.0"
10149 */
10150
10151xmlChar *
10152xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10153 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010154
Daniel Veillarda07050d2003-10-19 14:46:32 +000010155 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010156 SKIP(7);
10157 SKIP_BLANKS;
10158 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010159 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010160 return(NULL);
10161 }
10162 NEXT;
10163 SKIP_BLANKS;
10164 if (RAW == '"') {
10165 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010166 version = xmlParseVersionNum(ctxt);
10167 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010168 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010169 } else
10170 NEXT;
10171 } else if (RAW == '\''){
10172 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010173 version = xmlParseVersionNum(ctxt);
10174 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010175 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010176 } else
10177 NEXT;
10178 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010179 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010180 }
10181 }
10182 return(version);
10183}
10184
10185/**
10186 * xmlParseEncName:
10187 * @ctxt: an XML parser context
10188 *
10189 * parse the XML encoding name
10190 *
10191 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10192 *
10193 * Returns the encoding name value or NULL
10194 */
10195xmlChar *
10196xmlParseEncName(xmlParserCtxtPtr ctxt) {
10197 xmlChar *buf = NULL;
10198 int len = 0;
10199 int size = 10;
10200 xmlChar cur;
10201
10202 cur = CUR;
10203 if (((cur >= 'a') && (cur <= 'z')) ||
10204 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010205 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010206 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010207 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010208 return(NULL);
10209 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010210
Owen Taylor3473f882001-02-23 17:55:21 +000010211 buf[len++] = cur;
10212 NEXT;
10213 cur = CUR;
10214 while (((cur >= 'a') && (cur <= 'z')) ||
10215 ((cur >= 'A') && (cur <= 'Z')) ||
10216 ((cur >= '0') && (cur <= '9')) ||
10217 (cur == '.') || (cur == '_') ||
10218 (cur == '-')) {
10219 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010220 xmlChar *tmp;
10221
Owen Taylor3473f882001-02-23 17:55:21 +000010222 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010223 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10224 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010225 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010226 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010227 return(NULL);
10228 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010229 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010230 }
10231 buf[len++] = cur;
10232 NEXT;
10233 cur = CUR;
10234 if (cur == 0) {
10235 SHRINK;
10236 GROW;
10237 cur = CUR;
10238 }
10239 }
10240 buf[len] = 0;
10241 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010242 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010243 }
10244 return(buf);
10245}
10246
10247/**
10248 * xmlParseEncodingDecl:
10249 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010250 *
Owen Taylor3473f882001-02-23 17:55:21 +000010251 * parse the XML encoding declaration
10252 *
10253 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10254 *
10255 * this setups the conversion filters.
10256 *
10257 * Returns the encoding value or NULL
10258 */
10259
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010260const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010261xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10262 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010263
10264 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010265 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010266 SKIP(8);
10267 SKIP_BLANKS;
10268 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010269 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010270 return(NULL);
10271 }
10272 NEXT;
10273 SKIP_BLANKS;
10274 if (RAW == '"') {
10275 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010276 encoding = xmlParseEncName(ctxt);
10277 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010278 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010279 } else
10280 NEXT;
10281 } else if (RAW == '\''){
10282 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010283 encoding = xmlParseEncName(ctxt);
10284 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010285 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010286 } else
10287 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010288 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010289 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010290 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010291
10292 /*
10293 * Non standard parsing, allowing the user to ignore encoding
10294 */
10295 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10296 return(encoding);
10297
Daniel Veillard6b621b82003-08-11 15:03:34 +000010298 /*
10299 * UTF-16 encoding stwich has already taken place at this stage,
10300 * more over the little-endian/big-endian selection is already done
10301 */
10302 if ((encoding != NULL) &&
10303 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10304 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010305 /*
10306 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010307 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010308 * document is apparently UTF-8 compatible, then raise an
10309 * encoding mismatch fatal error
10310 */
10311 if ((ctxt->encoding == NULL) &&
10312 (ctxt->input->buf != NULL) &&
10313 (ctxt->input->buf->encoder == NULL)) {
10314 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10315 "Document labelled UTF-16 but has UTF-8 content\n");
10316 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010317 if (ctxt->encoding != NULL)
10318 xmlFree((xmlChar *) ctxt->encoding);
10319 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010320 }
10321 /*
10322 * UTF-8 encoding is handled natively
10323 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010324 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010325 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10326 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010327 if (ctxt->encoding != NULL)
10328 xmlFree((xmlChar *) ctxt->encoding);
10329 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010330 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010331 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010332 xmlCharEncodingHandlerPtr handler;
10333
10334 if (ctxt->input->encoding != NULL)
10335 xmlFree((xmlChar *) ctxt->input->encoding);
10336 ctxt->input->encoding = encoding;
10337
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010338 handler = xmlFindCharEncodingHandler((const char *) encoding);
10339 if (handler != NULL) {
10340 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010341 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010342 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010343 "Unsupported encoding %s\n", encoding);
10344 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010345 }
10346 }
10347 }
10348 return(encoding);
10349}
10350
10351/**
10352 * xmlParseSDDecl:
10353 * @ctxt: an XML parser context
10354 *
10355 * parse the XML standalone declaration
10356 *
10357 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010358 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010359 *
10360 * [ VC: Standalone Document Declaration ]
10361 * TODO The standalone document declaration must have the value "no"
10362 * if any external markup declarations contain declarations of:
10363 * - attributes with default values, if elements to which these
10364 * attributes apply appear in the document without specifications
10365 * of values for these attributes, or
10366 * - entities (other than amp, lt, gt, apos, quot), if references
10367 * to those entities appear in the document, or
10368 * - attributes with values subject to normalization, where the
10369 * attribute appears in the document with a value which will change
10370 * as a result of normalization, or
10371 * - element types with element content, if white space occurs directly
10372 * within any instance of those types.
10373 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010374 * Returns:
10375 * 1 if standalone="yes"
10376 * 0 if standalone="no"
10377 * -2 if standalone attribute is missing or invalid
10378 * (A standalone value of -2 means that the XML declaration was found,
10379 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010380 */
10381
10382int
10383xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010384 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010385
10386 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010387 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010388 SKIP(10);
10389 SKIP_BLANKS;
10390 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010391 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010392 return(standalone);
10393 }
10394 NEXT;
10395 SKIP_BLANKS;
10396 if (RAW == '\''){
10397 NEXT;
10398 if ((RAW == 'n') && (NXT(1) == 'o')) {
10399 standalone = 0;
10400 SKIP(2);
10401 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10402 (NXT(2) == 's')) {
10403 standalone = 1;
10404 SKIP(3);
10405 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010406 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010407 }
10408 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010409 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010410 } else
10411 NEXT;
10412 } else if (RAW == '"'){
10413 NEXT;
10414 if ((RAW == 'n') && (NXT(1) == 'o')) {
10415 standalone = 0;
10416 SKIP(2);
10417 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10418 (NXT(2) == 's')) {
10419 standalone = 1;
10420 SKIP(3);
10421 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010422 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010423 }
10424 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010425 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010426 } else
10427 NEXT;
10428 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010429 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010430 }
10431 }
10432 return(standalone);
10433}
10434
10435/**
10436 * xmlParseXMLDecl:
10437 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010438 *
Owen Taylor3473f882001-02-23 17:55:21 +000010439 * parse an XML declaration header
10440 *
10441 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10442 */
10443
10444void
10445xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10446 xmlChar *version;
10447
10448 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010449 * This value for standalone indicates that the document has an
10450 * XML declaration but it does not have a standalone attribute.
10451 * It will be overwritten later if a standalone attribute is found.
10452 */
10453 ctxt->input->standalone = -2;
10454
10455 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010456 * We know that '<?xml' is here.
10457 */
10458 SKIP(5);
10459
William M. Brack76e95df2003-10-18 16:20:14 +000010460 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010461 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10462 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010463 }
10464 SKIP_BLANKS;
10465
10466 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010467 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010468 */
10469 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010470 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010471 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010472 } else {
10473 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10474 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010475 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010476 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010477 if (ctxt->options & XML_PARSE_OLD10) {
10478 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10479 "Unsupported version '%s'\n",
10480 version);
10481 } else {
10482 if ((version[0] == '1') && ((version[1] == '.'))) {
10483 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10484 "Unsupported version '%s'\n",
10485 version, NULL);
10486 } else {
10487 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10488 "Unsupported version '%s'\n",
10489 version);
10490 }
10491 }
Daniel Veillard19840942001-11-29 16:11:38 +000010492 }
10493 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010494 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010495 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010496 }
Owen Taylor3473f882001-02-23 17:55:21 +000010497
10498 /*
10499 * We may have the encoding declaration
10500 */
William M. Brack76e95df2003-10-18 16:20:14 +000010501 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010502 if ((RAW == '?') && (NXT(1) == '>')) {
10503 SKIP(2);
10504 return;
10505 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010506 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010507 }
10508 xmlParseEncodingDecl(ctxt);
10509 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10510 /*
10511 * The XML REC instructs us to stop parsing right here
10512 */
10513 return;
10514 }
10515
10516 /*
10517 * We may have the standalone status.
10518 */
William M. Brack76e95df2003-10-18 16:20:14 +000010519 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010520 if ((RAW == '?') && (NXT(1) == '>')) {
10521 SKIP(2);
10522 return;
10523 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010524 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010525 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010526
10527 /*
10528 * We can grow the input buffer freely at that point
10529 */
10530 GROW;
10531
Owen Taylor3473f882001-02-23 17:55:21 +000010532 SKIP_BLANKS;
10533 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10534
10535 SKIP_BLANKS;
10536 if ((RAW == '?') && (NXT(1) == '>')) {
10537 SKIP(2);
10538 } else if (RAW == '>') {
10539 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010540 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010541 NEXT;
10542 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010543 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010544 MOVETO_ENDTAG(CUR_PTR);
10545 NEXT;
10546 }
10547}
10548
10549/**
10550 * xmlParseMisc:
10551 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010552 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010553 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010554 *
10555 * [27] Misc ::= Comment | PI | S
10556 */
10557
10558void
10559xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010560 while ((ctxt->instate != XML_PARSER_EOF) &&
10561 (((RAW == '<') && (NXT(1) == '?')) ||
10562 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10563 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010564 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010565 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010566 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010567 NEXT;
10568 } else
10569 xmlParseComment(ctxt);
10570 }
10571}
10572
10573/**
10574 * xmlParseDocument:
10575 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010576 *
Owen Taylor3473f882001-02-23 17:55:21 +000010577 * parse an XML document (and build a tree if using the standard SAX
10578 * interface).
10579 *
10580 * [1] document ::= prolog element Misc*
10581 *
10582 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10583 *
10584 * Returns 0, -1 in case of error. the parser context is augmented
10585 * as a result of the parsing.
10586 */
10587
10588int
10589xmlParseDocument(xmlParserCtxtPtr ctxt) {
10590 xmlChar start[4];
10591 xmlCharEncoding enc;
10592
10593 xmlInitParser();
10594
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010595 if ((ctxt == NULL) || (ctxt->input == NULL))
10596 return(-1);
10597
Owen Taylor3473f882001-02-23 17:55:21 +000010598 GROW;
10599
10600 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010601 * SAX: detecting the level.
10602 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010603 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010604
10605 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010606 * SAX: beginning of the document processing.
10607 */
10608 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10609 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10610
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010611 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010612 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010613 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010614 * Get the 4 first bytes and decode the charset
10615 * if enc != XML_CHAR_ENCODING_NONE
10616 * plug some encoding conversion routines.
10617 */
10618 start[0] = RAW;
10619 start[1] = NXT(1);
10620 start[2] = NXT(2);
10621 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010622 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010623 if (enc != XML_CHAR_ENCODING_NONE) {
10624 xmlSwitchEncoding(ctxt, enc);
10625 }
Owen Taylor3473f882001-02-23 17:55:21 +000010626 }
10627
10628
10629 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010630 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010631 }
10632
10633 /*
10634 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010635 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010636 * than just the first line, unless the amount of data is really
10637 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010638 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010639 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10640 GROW;
10641 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010642 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010643
10644 /*
10645 * Note that we will switch encoding on the fly.
10646 */
10647 xmlParseXMLDecl(ctxt);
10648 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10649 /*
10650 * The XML REC instructs us to stop parsing right here
10651 */
10652 return(-1);
10653 }
10654 ctxt->standalone = ctxt->input->standalone;
10655 SKIP_BLANKS;
10656 } else {
10657 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10658 }
10659 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10660 ctxt->sax->startDocument(ctxt->userData);
10661
10662 /*
10663 * The Misc part of the Prolog
10664 */
10665 GROW;
10666 xmlParseMisc(ctxt);
10667
10668 /*
10669 * Then possibly doc type declaration(s) and more Misc
10670 * (doctypedecl Misc*)?
10671 */
10672 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010673 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010674
10675 ctxt->inSubset = 1;
10676 xmlParseDocTypeDecl(ctxt);
10677 if (RAW == '[') {
10678 ctxt->instate = XML_PARSER_DTD;
10679 xmlParseInternalSubset(ctxt);
10680 }
10681
10682 /*
10683 * Create and update the external subset.
10684 */
10685 ctxt->inSubset = 2;
10686 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10687 (!ctxt->disableSAX))
10688 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10689 ctxt->extSubSystem, ctxt->extSubURI);
10690 ctxt->inSubset = 0;
10691
Daniel Veillardac4118d2008-01-11 05:27:32 +000010692 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010693
10694 ctxt->instate = XML_PARSER_PROLOG;
10695 xmlParseMisc(ctxt);
10696 }
10697
10698 /*
10699 * Time to start parsing the tree itself
10700 */
10701 GROW;
10702 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010703 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10704 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010705 } else {
10706 ctxt->instate = XML_PARSER_CONTENT;
10707 xmlParseElement(ctxt);
10708 ctxt->instate = XML_PARSER_EPILOG;
10709
10710
10711 /*
10712 * The Misc part at the end
10713 */
10714 xmlParseMisc(ctxt);
10715
Daniel Veillard561b7f82002-03-20 21:55:57 +000010716 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010717 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010718 }
10719 ctxt->instate = XML_PARSER_EOF;
10720 }
10721
10722 /*
10723 * SAX: end of the document processing.
10724 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010725 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010726 ctxt->sax->endDocument(ctxt->userData);
10727
Daniel Veillard5997aca2002-03-18 18:36:20 +000010728 /*
10729 * Remove locally kept entity definitions if the tree was not built
10730 */
10731 if ((ctxt->myDoc != NULL) &&
10732 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10733 xmlFreeDoc(ctxt->myDoc);
10734 ctxt->myDoc = NULL;
10735 }
10736
Daniel Veillardae0765b2008-07-31 19:54:59 +000010737 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10738 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10739 if (ctxt->valid)
10740 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10741 if (ctxt->nsWellFormed)
10742 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10743 if (ctxt->options & XML_PARSE_OLD10)
10744 ctxt->myDoc->properties |= XML_DOC_OLD10;
10745 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010746 if (! ctxt->wellFormed) {
10747 ctxt->valid = 0;
10748 return(-1);
10749 }
Owen Taylor3473f882001-02-23 17:55:21 +000010750 return(0);
10751}
10752
10753/**
10754 * xmlParseExtParsedEnt:
10755 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010756 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010757 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010758 * An external general parsed entity is well-formed if it matches the
10759 * production labeled extParsedEnt.
10760 *
10761 * [78] extParsedEnt ::= TextDecl? content
10762 *
10763 * Returns 0, -1 in case of error. the parser context is augmented
10764 * as a result of the parsing.
10765 */
10766
10767int
10768xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10769 xmlChar start[4];
10770 xmlCharEncoding enc;
10771
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010772 if ((ctxt == NULL) || (ctxt->input == NULL))
10773 return(-1);
10774
Owen Taylor3473f882001-02-23 17:55:21 +000010775 xmlDefaultSAXHandlerInit();
10776
Daniel Veillard309f81d2003-09-23 09:02:53 +000010777 xmlDetectSAX2(ctxt);
10778
Owen Taylor3473f882001-02-23 17:55:21 +000010779 GROW;
10780
10781 /*
10782 * SAX: beginning of the document processing.
10783 */
10784 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10785 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10786
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010787 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010788 * Get the 4 first bytes and decode the charset
10789 * if enc != XML_CHAR_ENCODING_NONE
10790 * plug some encoding conversion routines.
10791 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010792 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10793 start[0] = RAW;
10794 start[1] = NXT(1);
10795 start[2] = NXT(2);
10796 start[3] = NXT(3);
10797 enc = xmlDetectCharEncoding(start, 4);
10798 if (enc != XML_CHAR_ENCODING_NONE) {
10799 xmlSwitchEncoding(ctxt, enc);
10800 }
Owen Taylor3473f882001-02-23 17:55:21 +000010801 }
10802
10803
10804 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010805 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010806 }
10807
10808 /*
10809 * Check for the XMLDecl in the Prolog.
10810 */
10811 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010812 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010813
10814 /*
10815 * Note that we will switch encoding on the fly.
10816 */
10817 xmlParseXMLDecl(ctxt);
10818 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10819 /*
10820 * The XML REC instructs us to stop parsing right here
10821 */
10822 return(-1);
10823 }
10824 SKIP_BLANKS;
10825 } else {
10826 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10827 }
10828 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10829 ctxt->sax->startDocument(ctxt->userData);
10830
10831 /*
10832 * Doing validity checking on chunk doesn't make sense
10833 */
10834 ctxt->instate = XML_PARSER_CONTENT;
10835 ctxt->validate = 0;
10836 ctxt->loadsubset = 0;
10837 ctxt->depth = 0;
10838
10839 xmlParseContent(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010840
Owen Taylor3473f882001-02-23 17:55:21 +000010841 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010842 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010843 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010844 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010845 }
10846
10847 /*
10848 * SAX: end of the document processing.
10849 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010850 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010851 ctxt->sax->endDocument(ctxt->userData);
10852
10853 if (! ctxt->wellFormed) return(-1);
10854 return(0);
10855}
10856
Daniel Veillard73b013f2003-09-30 12:36:01 +000010857#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010858/************************************************************************
10859 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010860 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010861 * *
10862 ************************************************************************/
10863
10864/**
10865 * xmlParseLookupSequence:
10866 * @ctxt: an XML parser context
10867 * @first: the first char to lookup
10868 * @next: the next char to lookup or zero
10869 * @third: the next char to lookup or zero
10870 *
10871 * Try to find if a sequence (first, next, third) or just (first next) or
10872 * (first) is available in the input stream.
10873 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10874 * to avoid rescanning sequences of bytes, it DOES change the state of the
10875 * parser, do not use liberally.
10876 *
10877 * Returns the index to the current parsing point if the full sequence
10878 * is available, -1 otherwise.
10879 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010880static int
Owen Taylor3473f882001-02-23 17:55:21 +000010881xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10882 xmlChar next, xmlChar third) {
10883 int base, len;
10884 xmlParserInputPtr in;
10885 const xmlChar *buf;
10886
10887 in = ctxt->input;
10888 if (in == NULL) return(-1);
10889 base = in->cur - in->base;
10890 if (base < 0) return(-1);
10891 if (ctxt->checkIndex > base)
10892 base = ctxt->checkIndex;
10893 if (in->buf == NULL) {
10894 buf = in->base;
10895 len = in->length;
10896 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010897 buf = xmlBufContent(in->buf->buffer);
10898 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010899 }
10900 /* take into account the sequence length */
10901 if (third) len -= 2;
10902 else if (next) len --;
10903 for (;base < len;base++) {
10904 if (buf[base] == first) {
10905 if (third != 0) {
10906 if ((buf[base + 1] != next) ||
10907 (buf[base + 2] != third)) continue;
10908 } else if (next != 0) {
10909 if (buf[base + 1] != next) continue;
10910 }
10911 ctxt->checkIndex = 0;
10912#ifdef DEBUG_PUSH
10913 if (next == 0)
10914 xmlGenericError(xmlGenericErrorContext,
10915 "PP: lookup '%c' found at %d\n",
10916 first, base);
10917 else if (third == 0)
10918 xmlGenericError(xmlGenericErrorContext,
10919 "PP: lookup '%c%c' found at %d\n",
10920 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010921 else
Owen Taylor3473f882001-02-23 17:55:21 +000010922 xmlGenericError(xmlGenericErrorContext,
10923 "PP: lookup '%c%c%c' found at %d\n",
10924 first, next, third, base);
10925#endif
10926 return(base - (in->cur - in->base));
10927 }
10928 }
10929 ctxt->checkIndex = base;
10930#ifdef DEBUG_PUSH
10931 if (next == 0)
10932 xmlGenericError(xmlGenericErrorContext,
10933 "PP: lookup '%c' failed\n", first);
10934 else if (third == 0)
10935 xmlGenericError(xmlGenericErrorContext,
10936 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010937 else
Owen Taylor3473f882001-02-23 17:55:21 +000010938 xmlGenericError(xmlGenericErrorContext,
10939 "PP: lookup '%c%c%c' failed\n", first, next, third);
10940#endif
10941 return(-1);
10942}
10943
10944/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010945 * xmlParseGetLasts:
10946 * @ctxt: an XML parser context
10947 * @lastlt: pointer to store the last '<' from the input
10948 * @lastgt: pointer to store the last '>' from the input
10949 *
10950 * Lookup the last < and > in the current chunk
10951 */
10952static void
10953xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10954 const xmlChar **lastgt) {
10955 const xmlChar *tmp;
10956
10957 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10958 xmlGenericError(xmlGenericErrorContext,
10959 "Internal error: xmlParseGetLasts\n");
10960 return;
10961 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010962 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010963 tmp = ctxt->input->end;
10964 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010965 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010966 if (tmp < ctxt->input->base) {
10967 *lastlt = NULL;
10968 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010969 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010970 *lastlt = tmp;
10971 tmp++;
10972 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10973 if (*tmp == '\'') {
10974 tmp++;
10975 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10976 if (tmp < ctxt->input->end) tmp++;
10977 } else if (*tmp == '"') {
10978 tmp++;
10979 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10980 if (tmp < ctxt->input->end) tmp++;
10981 } else
10982 tmp++;
10983 }
10984 if (tmp < ctxt->input->end)
10985 *lastgt = tmp;
10986 else {
10987 tmp = *lastlt;
10988 tmp--;
10989 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10990 if (tmp >= ctxt->input->base)
10991 *lastgt = tmp;
10992 else
10993 *lastgt = NULL;
10994 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010995 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010996 } else {
10997 *lastlt = NULL;
10998 *lastgt = NULL;
10999 }
11000}
11001/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011002 * xmlCheckCdataPush:
11003 * @cur: pointer to the bock of characters
11004 * @len: length of the block in bytes
11005 *
11006 * Check that the block of characters is okay as SCdata content [20]
11007 *
11008 * Returns the number of bytes to pass if okay, a negative index where an
11009 * UTF-8 error occured otherwise
11010 */
11011static int
11012xmlCheckCdataPush(const xmlChar *utf, int len) {
11013 int ix;
11014 unsigned char c;
11015 int codepoint;
11016
11017 if ((utf == NULL) || (len <= 0))
11018 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011019
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011020 for (ix = 0; ix < len;) { /* string is 0-terminated */
11021 c = utf[ix];
11022 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11023 if (c >= 0x20)
11024 ix++;
11025 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11026 ix++;
11027 else
11028 return(-ix);
11029 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11030 if (ix + 2 > len) return(ix);
11031 if ((utf[ix+1] & 0xc0 ) != 0x80)
11032 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011033 codepoint = (utf[ix] & 0x1f) << 6;
11034 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011035 if (!xmlIsCharQ(codepoint))
11036 return(-ix);
11037 ix += 2;
11038 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11039 if (ix + 3 > len) return(ix);
11040 if (((utf[ix+1] & 0xc0) != 0x80) ||
11041 ((utf[ix+2] & 0xc0) != 0x80))
11042 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011043 codepoint = (utf[ix] & 0xf) << 12;
11044 codepoint |= (utf[ix+1] & 0x3f) << 6;
11045 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011046 if (!xmlIsCharQ(codepoint))
11047 return(-ix);
11048 ix += 3;
11049 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11050 if (ix + 4 > len) return(ix);
11051 if (((utf[ix+1] & 0xc0) != 0x80) ||
11052 ((utf[ix+2] & 0xc0) != 0x80) ||
11053 ((utf[ix+3] & 0xc0) != 0x80))
11054 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011055 codepoint = (utf[ix] & 0x7) << 18;
11056 codepoint |= (utf[ix+1] & 0x3f) << 12;
11057 codepoint |= (utf[ix+2] & 0x3f) << 6;
11058 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011059 if (!xmlIsCharQ(codepoint))
11060 return(-ix);
11061 ix += 4;
11062 } else /* unknown encoding */
11063 return(-ix);
11064 }
11065 return(ix);
11066}
11067
11068/**
Owen Taylor3473f882001-02-23 17:55:21 +000011069 * xmlParseTryOrFinish:
11070 * @ctxt: an XML parser context
11071 * @terminate: last chunk indicator
11072 *
11073 * Try to progress on parsing
11074 *
11075 * Returns zero if no parsing was possible
11076 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011077static int
Owen Taylor3473f882001-02-23 17:55:21 +000011078xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11079 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011080 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011081 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011082 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011083
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011084 if (ctxt->input == NULL)
11085 return(0);
11086
Owen Taylor3473f882001-02-23 17:55:21 +000011087#ifdef DEBUG_PUSH
11088 switch (ctxt->instate) {
11089 case XML_PARSER_EOF:
11090 xmlGenericError(xmlGenericErrorContext,
11091 "PP: try EOF\n"); break;
11092 case XML_PARSER_START:
11093 xmlGenericError(xmlGenericErrorContext,
11094 "PP: try START\n"); break;
11095 case XML_PARSER_MISC:
11096 xmlGenericError(xmlGenericErrorContext,
11097 "PP: try MISC\n");break;
11098 case XML_PARSER_COMMENT:
11099 xmlGenericError(xmlGenericErrorContext,
11100 "PP: try COMMENT\n");break;
11101 case XML_PARSER_PROLOG:
11102 xmlGenericError(xmlGenericErrorContext,
11103 "PP: try PROLOG\n");break;
11104 case XML_PARSER_START_TAG:
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: try START_TAG\n");break;
11107 case XML_PARSER_CONTENT:
11108 xmlGenericError(xmlGenericErrorContext,
11109 "PP: try CONTENT\n");break;
11110 case XML_PARSER_CDATA_SECTION:
11111 xmlGenericError(xmlGenericErrorContext,
11112 "PP: try CDATA_SECTION\n");break;
11113 case XML_PARSER_END_TAG:
11114 xmlGenericError(xmlGenericErrorContext,
11115 "PP: try END_TAG\n");break;
11116 case XML_PARSER_ENTITY_DECL:
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: try ENTITY_DECL\n");break;
11119 case XML_PARSER_ENTITY_VALUE:
11120 xmlGenericError(xmlGenericErrorContext,
11121 "PP: try ENTITY_VALUE\n");break;
11122 case XML_PARSER_ATTRIBUTE_VALUE:
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: try ATTRIBUTE_VALUE\n");break;
11125 case XML_PARSER_DTD:
11126 xmlGenericError(xmlGenericErrorContext,
11127 "PP: try DTD\n");break;
11128 case XML_PARSER_EPILOG:
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: try EPILOG\n");break;
11131 case XML_PARSER_PI:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try PI\n");break;
11134 case XML_PARSER_IGNORE:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try IGNORE\n");break;
11137 }
11138#endif
11139
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011140 if ((ctxt->input != NULL) &&
11141 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011142 xmlSHRINK(ctxt);
11143 ctxt->checkIndex = 0;
11144 }
11145 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011146
Daniel Veillarda880b122003-04-21 21:36:41 +000011147 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000011148 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011149 return(0);
11150
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011151
Owen Taylor3473f882001-02-23 17:55:21 +000011152 /*
11153 * Pop-up of finished entities.
11154 */
11155 while ((RAW == 0) && (ctxt->inputNr > 1))
11156 xmlPopInput(ctxt);
11157
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011158 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011159 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011160 avail = ctxt->input->length -
11161 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011162 else {
11163 /*
11164 * If we are operating on converted input, try to flush
11165 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011166 * buffer. But do not do this in document start where
11167 * encoding="..." may not have been read and we work on a
11168 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011169 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011170 if ((ctxt->instate != XML_PARSER_START) &&
11171 (ctxt->input->buf->raw != NULL) &&
11172 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011173 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11174 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011175 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011176
11177 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011178 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11179 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011180 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011181 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011182 (ctxt->input->cur - ctxt->input->base);
11183 }
Owen Taylor3473f882001-02-23 17:55:21 +000011184 if (avail < 1)
11185 goto done;
11186 switch (ctxt->instate) {
11187 case XML_PARSER_EOF:
11188 /*
11189 * Document parsing is done !
11190 */
11191 goto done;
11192 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011193 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11194 xmlChar start[4];
11195 xmlCharEncoding enc;
11196
11197 /*
11198 * Very first chars read from the document flow.
11199 */
11200 if (avail < 4)
11201 goto done;
11202
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011203 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011204 * Get the 4 first bytes and decode the charset
11205 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011206 * plug some encoding conversion routines,
11207 * else xmlSwitchEncoding will set to (default)
11208 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011209 */
11210 start[0] = RAW;
11211 start[1] = NXT(1);
11212 start[2] = NXT(2);
11213 start[3] = NXT(3);
11214 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011215 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011216 break;
11217 }
Owen Taylor3473f882001-02-23 17:55:21 +000011218
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011219 if (avail < 2)
11220 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011221 cur = ctxt->input->cur[0];
11222 next = ctxt->input->cur[1];
11223 if (cur == 0) {
11224 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11225 ctxt->sax->setDocumentLocator(ctxt->userData,
11226 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011227 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011228 ctxt->instate = XML_PARSER_EOF;
11229#ifdef DEBUG_PUSH
11230 xmlGenericError(xmlGenericErrorContext,
11231 "PP: entering EOF\n");
11232#endif
11233 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11234 ctxt->sax->endDocument(ctxt->userData);
11235 goto done;
11236 }
11237 if ((cur == '<') && (next == '?')) {
11238 /* PI or XML decl */
11239 if (avail < 5) return(ret);
11240 if ((!terminate) &&
11241 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11242 return(ret);
11243 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11244 ctxt->sax->setDocumentLocator(ctxt->userData,
11245 &xmlDefaultSAXLocator);
11246 if ((ctxt->input->cur[2] == 'x') &&
11247 (ctxt->input->cur[3] == 'm') &&
11248 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011249 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011250 ret += 5;
11251#ifdef DEBUG_PUSH
11252 xmlGenericError(xmlGenericErrorContext,
11253 "PP: Parsing XML Decl\n");
11254#endif
11255 xmlParseXMLDecl(ctxt);
11256 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11257 /*
11258 * The XML REC instructs us to stop parsing right
11259 * here
11260 */
11261 ctxt->instate = XML_PARSER_EOF;
11262 return(0);
11263 }
11264 ctxt->standalone = ctxt->input->standalone;
11265 if ((ctxt->encoding == NULL) &&
11266 (ctxt->input->encoding != NULL))
11267 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11268 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11269 (!ctxt->disableSAX))
11270 ctxt->sax->startDocument(ctxt->userData);
11271 ctxt->instate = XML_PARSER_MISC;
11272#ifdef DEBUG_PUSH
11273 xmlGenericError(xmlGenericErrorContext,
11274 "PP: entering MISC\n");
11275#endif
11276 } else {
11277 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11278 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11279 (!ctxt->disableSAX))
11280 ctxt->sax->startDocument(ctxt->userData);
11281 ctxt->instate = XML_PARSER_MISC;
11282#ifdef DEBUG_PUSH
11283 xmlGenericError(xmlGenericErrorContext,
11284 "PP: entering MISC\n");
11285#endif
11286 }
11287 } else {
11288 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11289 ctxt->sax->setDocumentLocator(ctxt->userData,
11290 &xmlDefaultSAXLocator);
11291 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011292 if (ctxt->version == NULL) {
11293 xmlErrMemory(ctxt, NULL);
11294 break;
11295 }
Owen Taylor3473f882001-02-23 17:55:21 +000011296 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11297 (!ctxt->disableSAX))
11298 ctxt->sax->startDocument(ctxt->userData);
11299 ctxt->instate = XML_PARSER_MISC;
11300#ifdef DEBUG_PUSH
11301 xmlGenericError(xmlGenericErrorContext,
11302 "PP: entering MISC\n");
11303#endif
11304 }
11305 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011306 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011307 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011308 const xmlChar *prefix = NULL;
11309 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011310 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011311
11312 if ((avail < 2) && (ctxt->inputNr == 1))
11313 goto done;
11314 cur = ctxt->input->cur[0];
11315 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011316 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011317 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011318 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11319 ctxt->sax->endDocument(ctxt->userData);
11320 goto done;
11321 }
11322 if (!terminate) {
11323 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011324 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011325 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011326 goto done;
11327 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11328 goto done;
11329 }
11330 }
11331 if (ctxt->spaceNr == 0)
11332 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011333 else if (*ctxt->space == -2)
11334 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011335 else
11336 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011337#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011338 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011339#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011340 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011341#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011342 else
11343 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011344#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011345 if (ctxt->instate == XML_PARSER_EOF)
11346 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011347 if (name == NULL) {
11348 spacePop(ctxt);
11349 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011350 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11351 ctxt->sax->endDocument(ctxt->userData);
11352 goto done;
11353 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011354#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011355 /*
11356 * [ VC: Root Element Type ]
11357 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011358 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011359 */
11360 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11361 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11362 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011363#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011364
11365 /*
11366 * Check for an Empty Element.
11367 */
11368 if ((RAW == '/') && (NXT(1) == '>')) {
11369 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011370
11371 if (ctxt->sax2) {
11372 if ((ctxt->sax != NULL) &&
11373 (ctxt->sax->endElementNs != NULL) &&
11374 (!ctxt->disableSAX))
11375 ctxt->sax->endElementNs(ctxt->userData, name,
11376 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011377 if (ctxt->nsNr - nsNr > 0)
11378 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011379#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011380 } else {
11381 if ((ctxt->sax != NULL) &&
11382 (ctxt->sax->endElement != NULL) &&
11383 (!ctxt->disableSAX))
11384 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011385#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011386 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011387 spacePop(ctxt);
11388 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011389 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011390 } else {
11391 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011392 }
Daniel Veillard65686452012-07-19 18:25:01 +080011393 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011394 break;
11395 }
11396 if (RAW == '>') {
11397 NEXT;
11398 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011399 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011400 "Couldn't find end of Start Tag %s\n",
11401 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011402 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011403 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011404 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011405 if (ctxt->sax2)
11406 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011407#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011408 else
11409 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011410#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011411
Daniel Veillarda880b122003-04-21 21:36:41 +000011412 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011413 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011414 break;
11415 }
11416 case XML_PARSER_CONTENT: {
11417 const xmlChar *test;
11418 unsigned int cons;
11419 if ((avail < 2) && (ctxt->inputNr == 1))
11420 goto done;
11421 cur = ctxt->input->cur[0];
11422 next = ctxt->input->cur[1];
11423
11424 test = CUR_PTR;
11425 cons = ctxt->input->consumed;
11426 if ((cur == '<') && (next == '/')) {
11427 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011428 break;
11429 } else if ((cur == '<') && (next == '?')) {
11430 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011431 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11432 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011433 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011434 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011435 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011436 ctxt->instate = XML_PARSER_CONTENT;
11437 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011438 } else if ((cur == '<') && (next != '!')) {
11439 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011440 break;
11441 } else if ((cur == '<') && (next == '!') &&
11442 (ctxt->input->cur[2] == '-') &&
11443 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011444 int term;
11445
11446 if (avail < 4)
11447 goto done;
11448 ctxt->input->cur += 4;
11449 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11450 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011451 if ((!terminate) && (term < 0)) {
11452 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011453 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011454 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011455 xmlParseComment(ctxt);
11456 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011457 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011458 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11459 (ctxt->input->cur[2] == '[') &&
11460 (ctxt->input->cur[3] == 'C') &&
11461 (ctxt->input->cur[4] == 'D') &&
11462 (ctxt->input->cur[5] == 'A') &&
11463 (ctxt->input->cur[6] == 'T') &&
11464 (ctxt->input->cur[7] == 'A') &&
11465 (ctxt->input->cur[8] == '[')) {
11466 SKIP(9);
11467 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011468 break;
11469 } else if ((cur == '<') && (next == '!') &&
11470 (avail < 9)) {
11471 goto done;
11472 } else if (cur == '&') {
11473 if ((!terminate) &&
11474 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11475 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011476 xmlParseReference(ctxt);
11477 } else {
11478 /* TODO Avoid the extra copy, handle directly !!! */
11479 /*
11480 * Goal of the following test is:
11481 * - minimize calls to the SAX 'character' callback
11482 * when they are mergeable
11483 * - handle an problem for isBlank when we only parse
11484 * a sequence of blank chars and the next one is
11485 * not available to check against '<' presence.
11486 * - tries to homogenize the differences in SAX
11487 * callbacks between the push and pull versions
11488 * of the parser.
11489 */
11490 if ((ctxt->inputNr == 1) &&
11491 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11492 if (!terminate) {
11493 if (ctxt->progressive) {
11494 if ((lastlt == NULL) ||
11495 (ctxt->input->cur > lastlt))
11496 goto done;
11497 } else if (xmlParseLookupSequence(ctxt,
11498 '<', 0, 0) < 0) {
11499 goto done;
11500 }
11501 }
11502 }
11503 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011504 xmlParseCharData(ctxt, 0);
11505 }
11506 /*
11507 * Pop-up of finished entities.
11508 */
11509 while ((RAW == 0) && (ctxt->inputNr > 1))
11510 xmlPopInput(ctxt);
11511 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011512 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11513 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011514 ctxt->instate = XML_PARSER_EOF;
11515 break;
11516 }
11517 break;
11518 }
11519 case XML_PARSER_END_TAG:
11520 if (avail < 2)
11521 goto done;
11522 if (!terminate) {
11523 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011524 /* > can be found unescaped in attribute values */
11525 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011526 goto done;
11527 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11528 goto done;
11529 }
11530 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011531 if (ctxt->sax2) {
11532 xmlParseEndTag2(ctxt,
11533 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11534 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011535 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011536 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011537 }
11538#ifdef LIBXML_SAX1_ENABLED
11539 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011540 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011541#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011542 if (ctxt->instate == XML_PARSER_EOF) {
11543 /* Nothing */
11544 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011545 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011546 } else {
11547 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011548 }
11549 break;
11550 case XML_PARSER_CDATA_SECTION: {
11551 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011552 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011553 * cdataBlock merge back contiguous callbacks.
11554 */
11555 int base;
11556
11557 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11558 if (base < 0) {
11559 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011560 int tmp;
11561
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011562 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011563 XML_PARSER_BIG_BUFFER_SIZE);
11564 if (tmp < 0) {
11565 tmp = -tmp;
11566 ctxt->input->cur += tmp;
11567 goto encoding_error;
11568 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11570 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011571 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011572 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011573 else if (ctxt->sax->characters != NULL)
11574 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011575 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011576 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011577 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011578 ctxt->checkIndex = 0;
11579 }
11580 goto done;
11581 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011582 int tmp;
11583
11584 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11585 if ((tmp < 0) || (tmp != base)) {
11586 tmp = -tmp;
11587 ctxt->input->cur += tmp;
11588 goto encoding_error;
11589 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011590 if ((ctxt->sax != NULL) && (base == 0) &&
11591 (ctxt->sax->cdataBlock != NULL) &&
11592 (!ctxt->disableSAX)) {
11593 /*
11594 * Special case to provide identical behaviour
11595 * between pull and push parsers on enpty CDATA
11596 * sections
11597 */
11598 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11599 (!strncmp((const char *)&ctxt->input->cur[-9],
11600 "<![CDATA[", 9)))
11601 ctxt->sax->cdataBlock(ctxt->userData,
11602 BAD_CAST "", 0);
11603 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011604 (!ctxt->disableSAX)) {
11605 if (ctxt->sax->cdataBlock != NULL)
11606 ctxt->sax->cdataBlock(ctxt->userData,
11607 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011608 else if (ctxt->sax->characters != NULL)
11609 ctxt->sax->characters(ctxt->userData,
11610 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011611 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011612 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011613 ctxt->checkIndex = 0;
11614 ctxt->instate = XML_PARSER_CONTENT;
11615#ifdef DEBUG_PUSH
11616 xmlGenericError(xmlGenericErrorContext,
11617 "PP: entering CONTENT\n");
11618#endif
11619 }
11620 break;
11621 }
Owen Taylor3473f882001-02-23 17:55:21 +000011622 case XML_PARSER_MISC:
11623 SKIP_BLANKS;
11624 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011625 avail = ctxt->input->length -
11626 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011627 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011628 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011629 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011630 if (avail < 2)
11631 goto done;
11632 cur = ctxt->input->cur[0];
11633 next = ctxt->input->cur[1];
11634 if ((cur == '<') && (next == '?')) {
11635 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011636 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11637 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011638 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011639 }
Owen Taylor3473f882001-02-23 17:55:21 +000011640#ifdef DEBUG_PUSH
11641 xmlGenericError(xmlGenericErrorContext,
11642 "PP: Parsing PI\n");
11643#endif
11644 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011645 ctxt->instate = XML_PARSER_MISC;
11646 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011647 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011648 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011649 (ctxt->input->cur[2] == '-') &&
11650 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011651 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011652 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11653 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011654 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011655 }
Owen Taylor3473f882001-02-23 17:55:21 +000011656#ifdef DEBUG_PUSH
11657 xmlGenericError(xmlGenericErrorContext,
11658 "PP: Parsing Comment\n");
11659#endif
11660 xmlParseComment(ctxt);
11661 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011662 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011663 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011664 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011665 (ctxt->input->cur[2] == 'D') &&
11666 (ctxt->input->cur[3] == 'O') &&
11667 (ctxt->input->cur[4] == 'C') &&
11668 (ctxt->input->cur[5] == 'T') &&
11669 (ctxt->input->cur[6] == 'Y') &&
11670 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011671 (ctxt->input->cur[8] == 'E')) {
11672 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011673 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11674 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011675 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011676 }
Owen Taylor3473f882001-02-23 17:55:21 +000011677#ifdef DEBUG_PUSH
11678 xmlGenericError(xmlGenericErrorContext,
11679 "PP: Parsing internal subset\n");
11680#endif
11681 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011682 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011683 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011684 xmlParseDocTypeDecl(ctxt);
11685 if (RAW == '[') {
11686 ctxt->instate = XML_PARSER_DTD;
11687#ifdef DEBUG_PUSH
11688 xmlGenericError(xmlGenericErrorContext,
11689 "PP: entering DTD\n");
11690#endif
11691 } else {
11692 /*
11693 * Create and update the external subset.
11694 */
11695 ctxt->inSubset = 2;
11696 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11697 (ctxt->sax->externalSubset != NULL))
11698 ctxt->sax->externalSubset(ctxt->userData,
11699 ctxt->intSubName, ctxt->extSubSystem,
11700 ctxt->extSubURI);
11701 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011702 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011703 ctxt->instate = XML_PARSER_PROLOG;
11704#ifdef DEBUG_PUSH
11705 xmlGenericError(xmlGenericErrorContext,
11706 "PP: entering PROLOG\n");
11707#endif
11708 }
11709 } else if ((cur == '<') && (next == '!') &&
11710 (avail < 9)) {
11711 goto done;
11712 } else {
11713 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011714 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011715 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011716#ifdef DEBUG_PUSH
11717 xmlGenericError(xmlGenericErrorContext,
11718 "PP: entering START_TAG\n");
11719#endif
11720 }
11721 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011722 case XML_PARSER_PROLOG:
11723 SKIP_BLANKS;
11724 if (ctxt->input->buf == NULL)
11725 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11726 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011727 avail = xmlBufUse(ctxt->input->buf->buffer) -
11728 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011729 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011730 goto done;
11731 cur = ctxt->input->cur[0];
11732 next = ctxt->input->cur[1];
11733 if ((cur == '<') && (next == '?')) {
11734 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011735 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11736 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011737 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011738 }
Owen Taylor3473f882001-02-23 17:55:21 +000011739#ifdef DEBUG_PUSH
11740 xmlGenericError(xmlGenericErrorContext,
11741 "PP: Parsing PI\n");
11742#endif
11743 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011744 ctxt->instate = XML_PARSER_PROLOG;
11745 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011746 } else if ((cur == '<') && (next == '!') &&
11747 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11748 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011749 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11750 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011751 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011752 }
Owen Taylor3473f882001-02-23 17:55:21 +000011753#ifdef DEBUG_PUSH
11754 xmlGenericError(xmlGenericErrorContext,
11755 "PP: Parsing Comment\n");
11756#endif
11757 xmlParseComment(ctxt);
11758 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011759 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011760 } else if ((cur == '<') && (next == '!') &&
11761 (avail < 4)) {
11762 goto done;
11763 } else {
11764 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011765 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011766 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011767 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011768#ifdef DEBUG_PUSH
11769 xmlGenericError(xmlGenericErrorContext,
11770 "PP: entering START_TAG\n");
11771#endif
11772 }
11773 break;
11774 case XML_PARSER_EPILOG:
11775 SKIP_BLANKS;
11776 if (ctxt->input->buf == NULL)
11777 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11778 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011779 avail = xmlBufUse(ctxt->input->buf->buffer) -
11780 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011781 if (avail < 2)
11782 goto done;
11783 cur = ctxt->input->cur[0];
11784 next = ctxt->input->cur[1];
11785 if ((cur == '<') && (next == '?')) {
11786 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011787 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11788 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011789 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011790 }
Owen Taylor3473f882001-02-23 17:55:21 +000011791#ifdef DEBUG_PUSH
11792 xmlGenericError(xmlGenericErrorContext,
11793 "PP: Parsing PI\n");
11794#endif
11795 xmlParsePI(ctxt);
11796 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011797 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011798 } else if ((cur == '<') && (next == '!') &&
11799 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11800 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011801 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11802 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011803 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011804 }
Owen Taylor3473f882001-02-23 17:55:21 +000011805#ifdef DEBUG_PUSH
11806 xmlGenericError(xmlGenericErrorContext,
11807 "PP: Parsing Comment\n");
11808#endif
11809 xmlParseComment(ctxt);
11810 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011811 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011812 } else if ((cur == '<') && (next == '!') &&
11813 (avail < 4)) {
11814 goto done;
11815 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011816 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011817 ctxt->instate = XML_PARSER_EOF;
11818#ifdef DEBUG_PUSH
11819 xmlGenericError(xmlGenericErrorContext,
11820 "PP: entering EOF\n");
11821#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011822 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011823 ctxt->sax->endDocument(ctxt->userData);
11824 goto done;
11825 }
11826 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011827 case XML_PARSER_DTD: {
11828 /*
11829 * Sorry but progressive parsing of the internal subset
11830 * is not expected to be supported. We first check that
11831 * the full content of the internal subset is available and
11832 * the parsing is launched only at that point.
11833 * Internal subset ends up with "']' S? '>'" in an unescaped
11834 * section and not in a ']]>' sequence which are conditional
11835 * sections (whoever argued to keep that crap in XML deserve
11836 * a place in hell !).
11837 */
11838 int base, i;
11839 xmlChar *buf;
11840 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011841 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011842
11843 base = ctxt->input->cur - ctxt->input->base;
11844 if (base < 0) return(0);
11845 if (ctxt->checkIndex > base)
11846 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011847 buf = xmlBufContent(ctxt->input->buf->buffer);
11848 use = xmlBufUse(ctxt->input->buf->buffer);
11849 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011850 if (quote != 0) {
11851 if (buf[base] == quote)
11852 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011853 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011854 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011855 if ((quote == 0) && (buf[base] == '<')) {
11856 int found = 0;
11857 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011858 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011859 (buf[base + 1] == '!') &&
11860 (buf[base + 2] == '-') &&
11861 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011862 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011863 if ((buf[base] == '-') &&
11864 (buf[base + 1] == '-') &&
11865 (buf[base + 2] == '>')) {
11866 found = 1;
11867 base += 2;
11868 break;
11869 }
11870 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011871 if (!found) {
11872#if 0
11873 fprintf(stderr, "unfinished comment\n");
11874#endif
11875 break; /* for */
11876 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011877 continue;
11878 }
11879 }
Owen Taylor3473f882001-02-23 17:55:21 +000011880 if (buf[base] == '"') {
11881 quote = '"';
11882 continue;
11883 }
11884 if (buf[base] == '\'') {
11885 quote = '\'';
11886 continue;
11887 }
11888 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011889#if 0
11890 fprintf(stderr, "%c%c%c%c: ", buf[base],
11891 buf[base + 1], buf[base + 2], buf[base + 3]);
11892#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011893 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011894 break;
11895 if (buf[base + 1] == ']') {
11896 /* conditional crap, skip both ']' ! */
11897 base++;
11898 continue;
11899 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011900 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011901 if (buf[base + i] == '>') {
11902#if 0
11903 fprintf(stderr, "found\n");
11904#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011905 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011906 }
11907 if (!IS_BLANK_CH(buf[base + i])) {
11908#if 0
11909 fprintf(stderr, "not found\n");
11910#endif
11911 goto not_end_of_int_subset;
11912 }
Owen Taylor3473f882001-02-23 17:55:21 +000011913 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011914#if 0
11915 fprintf(stderr, "end of stream\n");
11916#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011917 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011918
Owen Taylor3473f882001-02-23 17:55:21 +000011919 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011920not_end_of_int_subset:
11921 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011922 }
11923 /*
11924 * We didn't found the end of the Internal subset
11925 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011926 if (quote == 0)
11927 ctxt->checkIndex = base;
11928 else
11929 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011930#ifdef DEBUG_PUSH
11931 if (next == 0)
11932 xmlGenericError(xmlGenericErrorContext,
11933 "PP: lookup of int subset end filed\n");
11934#endif
11935 goto done;
11936
11937found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011938 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011939 xmlParseInternalSubset(ctxt);
11940 ctxt->inSubset = 2;
11941 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11942 (ctxt->sax->externalSubset != NULL))
11943 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11944 ctxt->extSubSystem, ctxt->extSubURI);
11945 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011946 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011947 ctxt->instate = XML_PARSER_PROLOG;
11948 ctxt->checkIndex = 0;
11949#ifdef DEBUG_PUSH
11950 xmlGenericError(xmlGenericErrorContext,
11951 "PP: entering PROLOG\n");
11952#endif
11953 break;
11954 }
11955 case XML_PARSER_COMMENT:
11956 xmlGenericError(xmlGenericErrorContext,
11957 "PP: internal error, state == COMMENT\n");
11958 ctxt->instate = XML_PARSER_CONTENT;
11959#ifdef DEBUG_PUSH
11960 xmlGenericError(xmlGenericErrorContext,
11961 "PP: entering CONTENT\n");
11962#endif
11963 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011964 case XML_PARSER_IGNORE:
11965 xmlGenericError(xmlGenericErrorContext,
11966 "PP: internal error, state == IGNORE");
11967 ctxt->instate = XML_PARSER_DTD;
11968#ifdef DEBUG_PUSH
11969 xmlGenericError(xmlGenericErrorContext,
11970 "PP: entering DTD\n");
11971#endif
11972 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011973 case XML_PARSER_PI:
11974 xmlGenericError(xmlGenericErrorContext,
11975 "PP: internal error, state == PI\n");
11976 ctxt->instate = XML_PARSER_CONTENT;
11977#ifdef DEBUG_PUSH
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: entering CONTENT\n");
11980#endif
11981 break;
11982 case XML_PARSER_ENTITY_DECL:
11983 xmlGenericError(xmlGenericErrorContext,
11984 "PP: internal error, state == ENTITY_DECL\n");
11985 ctxt->instate = XML_PARSER_DTD;
11986#ifdef DEBUG_PUSH
11987 xmlGenericError(xmlGenericErrorContext,
11988 "PP: entering DTD\n");
11989#endif
11990 break;
11991 case XML_PARSER_ENTITY_VALUE:
11992 xmlGenericError(xmlGenericErrorContext,
11993 "PP: internal error, state == ENTITY_VALUE\n");
11994 ctxt->instate = XML_PARSER_CONTENT;
11995#ifdef DEBUG_PUSH
11996 xmlGenericError(xmlGenericErrorContext,
11997 "PP: entering DTD\n");
11998#endif
11999 break;
12000 case XML_PARSER_ATTRIBUTE_VALUE:
12001 xmlGenericError(xmlGenericErrorContext,
12002 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12003 ctxt->instate = XML_PARSER_START_TAG;
12004#ifdef DEBUG_PUSH
12005 xmlGenericError(xmlGenericErrorContext,
12006 "PP: entering START_TAG\n");
12007#endif
12008 break;
12009 case XML_PARSER_SYSTEM_LITERAL:
12010 xmlGenericError(xmlGenericErrorContext,
12011 "PP: internal error, state == SYSTEM_LITERAL\n");
12012 ctxt->instate = XML_PARSER_START_TAG;
12013#ifdef DEBUG_PUSH
12014 xmlGenericError(xmlGenericErrorContext,
12015 "PP: entering START_TAG\n");
12016#endif
12017 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012018 case XML_PARSER_PUBLIC_LITERAL:
12019 xmlGenericError(xmlGenericErrorContext,
12020 "PP: internal error, state == PUBLIC_LITERAL\n");
12021 ctxt->instate = XML_PARSER_START_TAG;
12022#ifdef DEBUG_PUSH
12023 xmlGenericError(xmlGenericErrorContext,
12024 "PP: entering START_TAG\n");
12025#endif
12026 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012027 }
12028 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012029done:
Owen Taylor3473f882001-02-23 17:55:21 +000012030#ifdef DEBUG_PUSH
12031 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12032#endif
12033 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012034encoding_error:
12035 {
12036 char buffer[150];
12037
12038 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12039 ctxt->input->cur[0], ctxt->input->cur[1],
12040 ctxt->input->cur[2], ctxt->input->cur[3]);
12041 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12042 "Input is not proper UTF-8, indicate encoding !\n%s",
12043 BAD_CAST buffer, NULL);
12044 }
12045 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012046}
12047
12048/**
Daniel Veillard65686452012-07-19 18:25:01 +080012049 * xmlParseCheckTransition:
12050 * @ctxt: an XML parser context
12051 * @chunk: a char array
12052 * @size: the size in byte of the chunk
12053 *
12054 * Check depending on the current parser state if the chunk given must be
12055 * processed immediately or one need more data to advance on parsing.
12056 *
12057 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12058 */
12059static int
12060xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12061 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12062 return(-1);
12063 if (ctxt->instate == XML_PARSER_START_TAG) {
12064 if (memchr(chunk, '>', size) != NULL)
12065 return(1);
12066 return(0);
12067 }
12068 if (ctxt->progressive == XML_PARSER_COMMENT) {
12069 if (memchr(chunk, '>', size) != NULL)
12070 return(1);
12071 return(0);
12072 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012073 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12074 if (memchr(chunk, '>', size) != NULL)
12075 return(1);
12076 return(0);
12077 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012078 if (ctxt->progressive == XML_PARSER_PI) {
12079 if (memchr(chunk, '>', size) != NULL)
12080 return(1);
12081 return(0);
12082 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012083 if (ctxt->instate == XML_PARSER_END_TAG) {
12084 if (memchr(chunk, '>', size) != NULL)
12085 return(1);
12086 return(0);
12087 }
12088 if ((ctxt->progressive == XML_PARSER_DTD) ||
12089 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012090 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012091 return(1);
12092 return(0);
12093 }
Daniel Veillard65686452012-07-19 18:25:01 +080012094 return(1);
12095}
12096
12097/**
Owen Taylor3473f882001-02-23 17:55:21 +000012098 * xmlParseChunk:
12099 * @ctxt: an XML parser context
12100 * @chunk: an char array
12101 * @size: the size in byte of the chunk
12102 * @terminate: last chunk indicator
12103 *
12104 * Parse a Chunk of memory
12105 *
12106 * Returns zero if no error, the xmlParserErrors otherwise.
12107 */
12108int
12109xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12110 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012111 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012112 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012113 size_t old_avail = 0;
12114 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012115
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012116 if (ctxt == NULL)
12117 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012118 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012119 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012120 if (ctxt->instate == XML_PARSER_EOF)
12121 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012122 if (ctxt->instate == XML_PARSER_START)
12123 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012124 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12125 (chunk[size - 1] == '\r')) {
12126 end_in_lf = 1;
12127 size--;
12128 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012129
12130xmldecl_done:
12131
Owen Taylor3473f882001-02-23 17:55:21 +000012132 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12133 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012134 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12135 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012136 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012137
Daniel Veillard65686452012-07-19 18:25:01 +080012138 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012139 /*
12140 * Specific handling if we autodetected an encoding, we should not
12141 * push more than the first line ... which depend on the encoding
12142 * And only push the rest once the final encoding was detected
12143 */
12144 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12145 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012146 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012147
12148 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12149 BAD_CAST "UTF-16")) ||
12150 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12151 BAD_CAST "UTF16")))
12152 len = 90;
12153 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12154 BAD_CAST "UCS-4")) ||
12155 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12156 BAD_CAST "UCS4")))
12157 len = 180;
12158
12159 if (ctxt->input->buf->rawconsumed < len)
12160 len -= ctxt->input->buf->rawconsumed;
12161
Raul Hudeaba9716a2010-03-15 10:13:29 +010012162 /*
12163 * Change size for reading the initial declaration only
12164 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12165 * will blindly copy extra bytes from memory.
12166 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012167 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012168 remain = size - len;
12169 size = len;
12170 } else {
12171 remain = 0;
12172 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012173 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012174 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012175 if (res < 0) {
12176 ctxt->errNo = XML_PARSER_EOF;
12177 ctxt->disableSAX = 1;
12178 return (XML_PARSER_EOF);
12179 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012180 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012181#ifdef DEBUG_PUSH
12182 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12183#endif
12184
Owen Taylor3473f882001-02-23 17:55:21 +000012185 } else if (ctxt->instate != XML_PARSER_EOF) {
12186 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12187 xmlParserInputBufferPtr in = ctxt->input->buf;
12188 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12189 (in->raw != NULL)) {
12190 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012191 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12192 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012193
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012194 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012195 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012196 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012197 xmlGenericError(xmlGenericErrorContext,
12198 "xmlParseChunk: encoder error\n");
12199 return(XML_ERR_INVALID_ENCODING);
12200 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012201 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012202 }
12203 }
12204 }
Daniel Veillard65686452012-07-19 18:25:01 +080012205 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012206 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012207 } else {
12208 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12209 avail = xmlBufUse(ctxt->input->buf->buffer);
12210 /*
12211 * Depending on the current state it may not be such
12212 * a good idea to try parsing if there is nothing in the chunk
12213 * which would be worth doing a parser state transition and we
12214 * need to wait for more data
12215 */
12216 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12217 (old_avail == 0) || (avail == 0) ||
12218 (xmlParseCheckTransition(ctxt,
12219 (const char *)&ctxt->input->base[old_avail],
12220 avail - old_avail)))
12221 xmlParseTryOrFinish(ctxt, terminate);
12222 }
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012223 if ((ctxt->input != NULL) &&
12224 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12225 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12226 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12227 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12228 ctxt->instate = XML_PARSER_EOF;
12229 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012230 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12231 return(ctxt->errNo);
12232
12233 if (remain != 0) {
12234 chunk += size;
12235 size = remain;
12236 remain = 0;
12237 goto xmldecl_done;
12238 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012239 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12240 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012241 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12242 ctxt->input);
12243 size_t current = ctxt->input->cur - ctxt->input->base;
12244
Daniel Veillarda617e242006-01-09 14:38:44 +000012245 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012246
12247 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12248 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012249 }
Owen Taylor3473f882001-02-23 17:55:21 +000012250 if (terminate) {
12251 /*
12252 * Check for termination
12253 */
Daniel Veillard65686452012-07-19 18:25:01 +080012254 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012255
12256 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012257 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012258 cur_avail = ctxt->input->length -
12259 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012260 else
Daniel Veillard65686452012-07-19 18:25:01 +080012261 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12262 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012263 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012264
Owen Taylor3473f882001-02-23 17:55:21 +000012265 if ((ctxt->instate != XML_PARSER_EOF) &&
12266 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012267 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012268 }
Daniel Veillard65686452012-07-19 18:25:01 +080012269 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012270 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012271 }
Owen Taylor3473f882001-02-23 17:55:21 +000012272 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012273 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012274 ctxt->sax->endDocument(ctxt->userData);
12275 }
12276 ctxt->instate = XML_PARSER_EOF;
12277 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012278 if (ctxt->wellFormed == 0)
12279 return((xmlParserErrors) ctxt->errNo);
12280 else
12281 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012282}
12283
12284/************************************************************************
12285 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012286 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012287 * *
12288 ************************************************************************/
12289
12290/**
Owen Taylor3473f882001-02-23 17:55:21 +000012291 * xmlCreatePushParserCtxt:
12292 * @sax: a SAX handler
12293 * @user_data: The user data returned on SAX callbacks
12294 * @chunk: a pointer to an array of chars
12295 * @size: number of chars in the array
12296 * @filename: an optional file name or URI
12297 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012298 * Create a parser context for using the XML parser in push mode.
12299 * If @buffer and @size are non-NULL, the data is used to detect
12300 * the encoding. The remaining characters will be parsed so they
12301 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012302 * To allow content encoding detection, @size should be >= 4
12303 * The value of @filename is used for fetching external entities
12304 * and error/warning reports.
12305 *
12306 * Returns the new parser context or NULL
12307 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012308
Owen Taylor3473f882001-02-23 17:55:21 +000012309xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012310xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012311 const char *chunk, int size, const char *filename) {
12312 xmlParserCtxtPtr ctxt;
12313 xmlParserInputPtr inputStream;
12314 xmlParserInputBufferPtr buf;
12315 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12316
12317 /*
12318 * plug some encoding conversion routines
12319 */
12320 if ((chunk != NULL) && (size >= 4))
12321 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12322
12323 buf = xmlAllocParserInputBuffer(enc);
12324 if (buf == NULL) return(NULL);
12325
12326 ctxt = xmlNewParserCtxt();
12327 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012328 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012329 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012330 return(NULL);
12331 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012332 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012333 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12334 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012335 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012336 xmlFreeParserInputBuffer(buf);
12337 xmlFreeParserCtxt(ctxt);
12338 return(NULL);
12339 }
Owen Taylor3473f882001-02-23 17:55:21 +000012340 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012341#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012342 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012343#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012344 xmlFree(ctxt->sax);
12345 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12346 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012347 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012348 xmlFreeParserInputBuffer(buf);
12349 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012350 return(NULL);
12351 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012352 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12353 if (sax->initialized == XML_SAX2_MAGIC)
12354 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12355 else
12356 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012357 if (user_data != NULL)
12358 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012359 }
Owen Taylor3473f882001-02-23 17:55:21 +000012360 if (filename == NULL) {
12361 ctxt->directory = NULL;
12362 } else {
12363 ctxt->directory = xmlParserGetDirectory(filename);
12364 }
12365
12366 inputStream = xmlNewInputStream(ctxt);
12367 if (inputStream == NULL) {
12368 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012369 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012370 return(NULL);
12371 }
12372
12373 if (filename == NULL)
12374 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012375 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012376 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012377 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012378 if (inputStream->filename == NULL) {
12379 xmlFreeParserCtxt(ctxt);
12380 xmlFreeParserInputBuffer(buf);
12381 return(NULL);
12382 }
12383 }
Owen Taylor3473f882001-02-23 17:55:21 +000012384 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012385 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012386 inputPush(ctxt, inputStream);
12387
William M. Brack3a1cd212005-02-11 14:35:54 +000012388 /*
12389 * If the caller didn't provide an initial 'chunk' for determining
12390 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12391 * that it can be automatically determined later
12392 */
12393 if ((size == 0) || (chunk == NULL)) {
12394 ctxt->charset = XML_CHAR_ENCODING_NONE;
12395 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012396 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12397 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012398
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012399 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012400
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012401 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012402#ifdef DEBUG_PUSH
12403 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12404#endif
12405 }
12406
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012407 if (enc != XML_CHAR_ENCODING_NONE) {
12408 xmlSwitchEncoding(ctxt, enc);
12409 }
12410
Owen Taylor3473f882001-02-23 17:55:21 +000012411 return(ctxt);
12412}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012413#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012414
12415/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012416 * xmlStopParser:
12417 * @ctxt: an XML parser context
12418 *
12419 * Blocks further parser processing
12420 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012421void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012422xmlStopParser(xmlParserCtxtPtr ctxt) {
12423 if (ctxt == NULL)
12424 return;
12425 ctxt->instate = XML_PARSER_EOF;
12426 ctxt->disableSAX = 1;
12427 if (ctxt->input != NULL) {
12428 ctxt->input->cur = BAD_CAST"";
12429 ctxt->input->base = ctxt->input->cur;
12430 }
12431}
12432
12433/**
Owen Taylor3473f882001-02-23 17:55:21 +000012434 * xmlCreateIOParserCtxt:
12435 * @sax: a SAX handler
12436 * @user_data: The user data returned on SAX callbacks
12437 * @ioread: an I/O read function
12438 * @ioclose: an I/O close function
12439 * @ioctx: an I/O handler
12440 * @enc: the charset encoding if known
12441 *
12442 * Create a parser context for using the XML parser with an existing
12443 * I/O stream
12444 *
12445 * Returns the new parser context or NULL
12446 */
12447xmlParserCtxtPtr
12448xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12449 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12450 void *ioctx, xmlCharEncoding enc) {
12451 xmlParserCtxtPtr ctxt;
12452 xmlParserInputPtr inputStream;
12453 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012454
Daniel Veillard42595322004-11-08 10:52:06 +000012455 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012456
12457 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012458 if (buf == NULL) {
12459 if (ioclose != NULL)
12460 ioclose(ioctx);
12461 return (NULL);
12462 }
Owen Taylor3473f882001-02-23 17:55:21 +000012463
12464 ctxt = xmlNewParserCtxt();
12465 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012466 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012467 return(NULL);
12468 }
12469 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012470#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012471 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012472#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012473 xmlFree(ctxt->sax);
12474 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12475 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012476 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012477 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012478 return(NULL);
12479 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012480 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12481 if (sax->initialized == XML_SAX2_MAGIC)
12482 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12483 else
12484 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012485 if (user_data != NULL)
12486 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012487 }
Owen Taylor3473f882001-02-23 17:55:21 +000012488
12489 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12490 if (inputStream == NULL) {
12491 xmlFreeParserCtxt(ctxt);
12492 return(NULL);
12493 }
12494 inputPush(ctxt, inputStream);
12495
12496 return(ctxt);
12497}
12498
Daniel Veillard4432df22003-09-28 18:58:27 +000012499#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012500/************************************************************************
12501 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012502 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012503 * *
12504 ************************************************************************/
12505
12506/**
12507 * xmlIOParseDTD:
12508 * @sax: the SAX handler block or NULL
12509 * @input: an Input Buffer
12510 * @enc: the charset encoding if known
12511 *
12512 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012513 *
Owen Taylor3473f882001-02-23 17:55:21 +000012514 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012515 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012516 */
12517
12518xmlDtdPtr
12519xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12520 xmlCharEncoding enc) {
12521 xmlDtdPtr ret = NULL;
12522 xmlParserCtxtPtr ctxt;
12523 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012524 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012525
12526 if (input == NULL)
12527 return(NULL);
12528
12529 ctxt = xmlNewParserCtxt();
12530 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012531 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012532 return(NULL);
12533 }
12534
12535 /*
12536 * Set-up the SAX context
12537 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012538 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012539 if (ctxt->sax != NULL)
12540 xmlFree(ctxt->sax);
12541 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012542 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012543 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012544 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012545
12546 /*
12547 * generate a parser input from the I/O handler
12548 */
12549
Daniel Veillard43caefb2003-12-07 19:32:22 +000012550 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012551 if (pinput == NULL) {
12552 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012553 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012554 xmlFreeParserCtxt(ctxt);
12555 return(NULL);
12556 }
12557
12558 /*
12559 * plug some encoding conversion routines here.
12560 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012561 if (xmlPushInput(ctxt, pinput) < 0) {
12562 if (sax != NULL) ctxt->sax = NULL;
12563 xmlFreeParserCtxt(ctxt);
12564 return(NULL);
12565 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012566 if (enc != XML_CHAR_ENCODING_NONE) {
12567 xmlSwitchEncoding(ctxt, enc);
12568 }
Owen Taylor3473f882001-02-23 17:55:21 +000012569
12570 pinput->filename = NULL;
12571 pinput->line = 1;
12572 pinput->col = 1;
12573 pinput->base = ctxt->input->cur;
12574 pinput->cur = ctxt->input->cur;
12575 pinput->free = NULL;
12576
12577 /*
12578 * let's parse that entity knowing it's an external subset.
12579 */
12580 ctxt->inSubset = 2;
12581 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012582 if (ctxt->myDoc == NULL) {
12583 xmlErrMemory(ctxt, "New Doc failed");
12584 return(NULL);
12585 }
12586 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012587 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12588 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012589
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012590 if ((enc == XML_CHAR_ENCODING_NONE) &&
12591 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012592 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012593 * Get the 4 first bytes and decode the charset
12594 * if enc != XML_CHAR_ENCODING_NONE
12595 * plug some encoding conversion routines.
12596 */
12597 start[0] = RAW;
12598 start[1] = NXT(1);
12599 start[2] = NXT(2);
12600 start[3] = NXT(3);
12601 enc = xmlDetectCharEncoding(start, 4);
12602 if (enc != XML_CHAR_ENCODING_NONE) {
12603 xmlSwitchEncoding(ctxt, enc);
12604 }
12605 }
12606
Owen Taylor3473f882001-02-23 17:55:21 +000012607 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12608
12609 if (ctxt->myDoc != NULL) {
12610 if (ctxt->wellFormed) {
12611 ret = ctxt->myDoc->extSubset;
12612 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012613 if (ret != NULL) {
12614 xmlNodePtr tmp;
12615
12616 ret->doc = NULL;
12617 tmp = ret->children;
12618 while (tmp != NULL) {
12619 tmp->doc = NULL;
12620 tmp = tmp->next;
12621 }
12622 }
Owen Taylor3473f882001-02-23 17:55:21 +000012623 } else {
12624 ret = NULL;
12625 }
12626 xmlFreeDoc(ctxt->myDoc);
12627 ctxt->myDoc = NULL;
12628 }
12629 if (sax != NULL) ctxt->sax = NULL;
12630 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012631
Owen Taylor3473f882001-02-23 17:55:21 +000012632 return(ret);
12633}
12634
12635/**
12636 * xmlSAXParseDTD:
12637 * @sax: the SAX handler block
12638 * @ExternalID: a NAME* containing the External ID of the DTD
12639 * @SystemID: a NAME* containing the URL to the DTD
12640 *
12641 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012642 *
Owen Taylor3473f882001-02-23 17:55:21 +000012643 * Returns the resulting xmlDtdPtr or NULL in case of error.
12644 */
12645
12646xmlDtdPtr
12647xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12648 const xmlChar *SystemID) {
12649 xmlDtdPtr ret = NULL;
12650 xmlParserCtxtPtr ctxt;
12651 xmlParserInputPtr input = NULL;
12652 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012653 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012654
12655 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12656
12657 ctxt = xmlNewParserCtxt();
12658 if (ctxt == NULL) {
12659 return(NULL);
12660 }
12661
12662 /*
12663 * Set-up the SAX context
12664 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012665 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012666 if (ctxt->sax != NULL)
12667 xmlFree(ctxt->sax);
12668 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012669 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012670 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012671
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012672 /*
12673 * Canonicalise the system ID
12674 */
12675 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012676 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012677 xmlFreeParserCtxt(ctxt);
12678 return(NULL);
12679 }
Owen Taylor3473f882001-02-23 17:55:21 +000012680
12681 /*
12682 * Ask the Entity resolver to load the damn thing
12683 */
12684
12685 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012686 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12687 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012688 if (input == NULL) {
12689 if (sax != NULL) ctxt->sax = NULL;
12690 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012691 if (systemIdCanonic != NULL)
12692 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012693 return(NULL);
12694 }
12695
12696 /*
12697 * plug some encoding conversion routines here.
12698 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012699 if (xmlPushInput(ctxt, input) < 0) {
12700 if (sax != NULL) ctxt->sax = NULL;
12701 xmlFreeParserCtxt(ctxt);
12702 if (systemIdCanonic != NULL)
12703 xmlFree(systemIdCanonic);
12704 return(NULL);
12705 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012706 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12707 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12708 xmlSwitchEncoding(ctxt, enc);
12709 }
Owen Taylor3473f882001-02-23 17:55:21 +000012710
12711 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012712 input->filename = (char *) systemIdCanonic;
12713 else
12714 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012715 input->line = 1;
12716 input->col = 1;
12717 input->base = ctxt->input->cur;
12718 input->cur = ctxt->input->cur;
12719 input->free = NULL;
12720
12721 /*
12722 * let's parse that entity knowing it's an external subset.
12723 */
12724 ctxt->inSubset = 2;
12725 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012726 if (ctxt->myDoc == NULL) {
12727 xmlErrMemory(ctxt, "New Doc failed");
12728 if (sax != NULL) ctxt->sax = NULL;
12729 xmlFreeParserCtxt(ctxt);
12730 return(NULL);
12731 }
12732 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012733 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12734 ExternalID, SystemID);
12735 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12736
12737 if (ctxt->myDoc != NULL) {
12738 if (ctxt->wellFormed) {
12739 ret = ctxt->myDoc->extSubset;
12740 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012741 if (ret != NULL) {
12742 xmlNodePtr tmp;
12743
12744 ret->doc = NULL;
12745 tmp = ret->children;
12746 while (tmp != NULL) {
12747 tmp->doc = NULL;
12748 tmp = tmp->next;
12749 }
12750 }
Owen Taylor3473f882001-02-23 17:55:21 +000012751 } else {
12752 ret = NULL;
12753 }
12754 xmlFreeDoc(ctxt->myDoc);
12755 ctxt->myDoc = NULL;
12756 }
12757 if (sax != NULL) ctxt->sax = NULL;
12758 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012759
Owen Taylor3473f882001-02-23 17:55:21 +000012760 return(ret);
12761}
12762
Daniel Veillard4432df22003-09-28 18:58:27 +000012763
Owen Taylor3473f882001-02-23 17:55:21 +000012764/**
12765 * xmlParseDTD:
12766 * @ExternalID: a NAME* containing the External ID of the DTD
12767 * @SystemID: a NAME* containing the URL to the DTD
12768 *
12769 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012770 *
Owen Taylor3473f882001-02-23 17:55:21 +000012771 * Returns the resulting xmlDtdPtr or NULL in case of error.
12772 */
12773
12774xmlDtdPtr
12775xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12776 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12777}
Daniel Veillard4432df22003-09-28 18:58:27 +000012778#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012779
12780/************************************************************************
12781 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012782 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012783 * *
12784 ************************************************************************/
12785
12786/**
Owen Taylor3473f882001-02-23 17:55:21 +000012787 * xmlParseCtxtExternalEntity:
12788 * @ctx: the existing parsing context
12789 * @URL: the URL for the entity to load
12790 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012791 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012792 *
12793 * Parse an external general entity within an existing parsing context
12794 * An external general parsed entity is well-formed if it matches the
12795 * production labeled extParsedEnt.
12796 *
12797 * [78] extParsedEnt ::= TextDecl? content
12798 *
12799 * Returns 0 if the entity is well formed, -1 in case of args problem and
12800 * the parser error code otherwise
12801 */
12802
12803int
12804xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012805 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012806 xmlParserCtxtPtr ctxt;
12807 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012808 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012809 xmlSAXHandlerPtr oldsax = NULL;
12810 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012811 xmlChar start[4];
12812 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012813
Daniel Veillardce682bc2004-11-05 17:22:25 +000012814 if (ctx == NULL) return(-1);
12815
Daniel Veillard0161e632008-08-28 15:36:32 +000012816 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12817 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012818 return(XML_ERR_ENTITY_LOOP);
12819 }
12820
Daniel Veillardcda96922001-08-21 10:56:31 +000012821 if (lst != NULL)
12822 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012823 if ((URL == NULL) && (ID == NULL))
12824 return(-1);
12825 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12826 return(-1);
12827
Rob Richards798743a2009-06-19 13:54:25 -040012828 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012829 if (ctxt == NULL) {
12830 return(-1);
12831 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012832
Owen Taylor3473f882001-02-23 17:55:21 +000012833 oldsax = ctxt->sax;
12834 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012835 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012836 newDoc = xmlNewDoc(BAD_CAST "1.0");
12837 if (newDoc == NULL) {
12838 xmlFreeParserCtxt(ctxt);
12839 return(-1);
12840 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012841 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012842 if (ctx->myDoc->dict) {
12843 newDoc->dict = ctx->myDoc->dict;
12844 xmlDictReference(newDoc->dict);
12845 }
Owen Taylor3473f882001-02-23 17:55:21 +000012846 if (ctx->myDoc != NULL) {
12847 newDoc->intSubset = ctx->myDoc->intSubset;
12848 newDoc->extSubset = ctx->myDoc->extSubset;
12849 }
12850 if (ctx->myDoc->URL != NULL) {
12851 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12852 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012853 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12854 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012855 ctxt->sax = oldsax;
12856 xmlFreeParserCtxt(ctxt);
12857 newDoc->intSubset = NULL;
12858 newDoc->extSubset = NULL;
12859 xmlFreeDoc(newDoc);
12860 return(-1);
12861 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012862 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012863 nodePush(ctxt, newDoc->children);
12864 if (ctx->myDoc == NULL) {
12865 ctxt->myDoc = newDoc;
12866 } else {
12867 ctxt->myDoc = ctx->myDoc;
12868 newDoc->children->doc = ctx->myDoc;
12869 }
12870
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012871 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012872 * Get the 4 first bytes and decode the charset
12873 * if enc != XML_CHAR_ENCODING_NONE
12874 * plug some encoding conversion routines.
12875 */
12876 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012877 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12878 start[0] = RAW;
12879 start[1] = NXT(1);
12880 start[2] = NXT(2);
12881 start[3] = NXT(3);
12882 enc = xmlDetectCharEncoding(start, 4);
12883 if (enc != XML_CHAR_ENCODING_NONE) {
12884 xmlSwitchEncoding(ctxt, enc);
12885 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012886 }
12887
Owen Taylor3473f882001-02-23 17:55:21 +000012888 /*
12889 * Parse a possible text declaration first
12890 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012891 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012892 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012893 /*
12894 * An XML-1.0 document can't reference an entity not XML-1.0
12895 */
12896 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12897 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012898 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012899 "Version mismatch between document and entity\n");
12900 }
Owen Taylor3473f882001-02-23 17:55:21 +000012901 }
12902
12903 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012904 * If the user provided its own SAX callbacks then reuse the
12905 * useData callback field, otherwise the expected setup in a
12906 * DOM builder is to have userData == ctxt
12907 */
12908 if (ctx->userData == ctx)
12909 ctxt->userData = ctxt;
12910 else
12911 ctxt->userData = ctx->userData;
12912
12913 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012914 * Doing validity checking on chunk doesn't make sense
12915 */
12916 ctxt->instate = XML_PARSER_CONTENT;
12917 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012918 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012919 ctxt->loadsubset = ctx->loadsubset;
12920 ctxt->depth = ctx->depth + 1;
12921 ctxt->replaceEntities = ctx->replaceEntities;
12922 if (ctxt->validate) {
12923 ctxt->vctxt.error = ctx->vctxt.error;
12924 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012925 } else {
12926 ctxt->vctxt.error = NULL;
12927 ctxt->vctxt.warning = NULL;
12928 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012929 ctxt->vctxt.nodeTab = NULL;
12930 ctxt->vctxt.nodeNr = 0;
12931 ctxt->vctxt.nodeMax = 0;
12932 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012933 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12934 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012935 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12936 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12937 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012938 ctxt->dictNames = ctx->dictNames;
12939 ctxt->attsDefault = ctx->attsDefault;
12940 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012941 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012942
12943 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012944
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012945 ctx->validate = ctxt->validate;
12946 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012947 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012948 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012949 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012950 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012951 }
12952 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012953 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012954 }
12955
12956 if (!ctxt->wellFormed) {
12957 if (ctxt->errNo == 0)
12958 ret = 1;
12959 else
12960 ret = ctxt->errNo;
12961 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012962 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012963 xmlNodePtr cur;
12964
12965 /*
12966 * Return the newly created nodeset after unlinking it from
12967 * they pseudo parent.
12968 */
12969 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012970 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012971 while (cur != NULL) {
12972 cur->parent = NULL;
12973 cur = cur->next;
12974 }
12975 newDoc->children->children = NULL;
12976 }
12977 ret = 0;
12978 }
12979 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012980 ctxt->dict = NULL;
12981 ctxt->attsDefault = NULL;
12982 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012983 xmlFreeParserCtxt(ctxt);
12984 newDoc->intSubset = NULL;
12985 newDoc->extSubset = NULL;
12986 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012987
Owen Taylor3473f882001-02-23 17:55:21 +000012988 return(ret);
12989}
12990
12991/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012992 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012993 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012994 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012995 * @sax: the SAX handler bloc (possibly NULL)
12996 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12997 * @depth: Used for loop detection, use 0
12998 * @URL: the URL for the entity to load
12999 * @ID: the System ID for the entity to load
13000 * @list: the return value for the set of parsed nodes
13001 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013002 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013003 *
13004 * Returns 0 if the entity is well formed, -1 in case of args problem and
13005 * the parser error code otherwise
13006 */
13007
Daniel Veillard7d515752003-09-26 19:12:37 +000013008static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013009xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13010 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013011 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013012 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013013 xmlParserCtxtPtr ctxt;
13014 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013015 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013016 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013017 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013018 xmlChar start[4];
13019 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013020
Daniel Veillard0161e632008-08-28 15:36:32 +000013021 if (((depth > 40) &&
13022 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13023 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013024 return(XML_ERR_ENTITY_LOOP);
13025 }
13026
Owen Taylor3473f882001-02-23 17:55:21 +000013027 if (list != NULL)
13028 *list = NULL;
13029 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013030 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013031 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013032 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013033
13034
Rob Richards9c0aa472009-03-26 18:10:19 +000013035 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013036 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013037 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013038 if (oldctxt != NULL) {
13039 ctxt->_private = oldctxt->_private;
13040 ctxt->loadsubset = oldctxt->loadsubset;
13041 ctxt->validate = oldctxt->validate;
13042 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013043 ctxt->record_info = oldctxt->record_info;
13044 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13045 ctxt->node_seq.length = oldctxt->node_seq.length;
13046 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013047 } else {
13048 /*
13049 * Doing validity checking on chunk without context
13050 * doesn't make sense
13051 */
13052 ctxt->_private = NULL;
13053 ctxt->validate = 0;
13054 ctxt->external = 2;
13055 ctxt->loadsubset = 0;
13056 }
Owen Taylor3473f882001-02-23 17:55:21 +000013057 if (sax != NULL) {
13058 oldsax = ctxt->sax;
13059 ctxt->sax = sax;
13060 if (user_data != NULL)
13061 ctxt->userData = user_data;
13062 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013063 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013064 newDoc = xmlNewDoc(BAD_CAST "1.0");
13065 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013066 ctxt->node_seq.maximum = 0;
13067 ctxt->node_seq.length = 0;
13068 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013069 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013070 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013071 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013072 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013073 newDoc->intSubset = doc->intSubset;
13074 newDoc->extSubset = doc->extSubset;
13075 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013076 xmlDictReference(newDoc->dict);
13077
Owen Taylor3473f882001-02-23 17:55:21 +000013078 if (doc->URL != NULL) {
13079 newDoc->URL = xmlStrdup(doc->URL);
13080 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013081 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13082 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013083 if (sax != NULL)
13084 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013085 ctxt->node_seq.maximum = 0;
13086 ctxt->node_seq.length = 0;
13087 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013088 xmlFreeParserCtxt(ctxt);
13089 newDoc->intSubset = NULL;
13090 newDoc->extSubset = NULL;
13091 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013092 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013093 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013094 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013095 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013096 ctxt->myDoc = doc;
13097 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013098
Daniel Veillard0161e632008-08-28 15:36:32 +000013099 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013100 * Get the 4 first bytes and decode the charset
13101 * if enc != XML_CHAR_ENCODING_NONE
13102 * plug some encoding conversion routines.
13103 */
13104 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013105 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13106 start[0] = RAW;
13107 start[1] = NXT(1);
13108 start[2] = NXT(2);
13109 start[3] = NXT(3);
13110 enc = xmlDetectCharEncoding(start, 4);
13111 if (enc != XML_CHAR_ENCODING_NONE) {
13112 xmlSwitchEncoding(ctxt, enc);
13113 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013114 }
13115
Owen Taylor3473f882001-02-23 17:55:21 +000013116 /*
13117 * Parse a possible text declaration first
13118 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013119 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013120 xmlParseTextDecl(ctxt);
13121 }
13122
Owen Taylor3473f882001-02-23 17:55:21 +000013123 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013124 ctxt->depth = depth;
13125
13126 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013127
Daniel Veillard561b7f82002-03-20 21:55:57 +000013128 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013129 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013130 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013131 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013132 }
13133 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013134 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013135 }
13136
13137 if (!ctxt->wellFormed) {
13138 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013139 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013140 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013141 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013142 } else {
13143 if (list != NULL) {
13144 xmlNodePtr cur;
13145
13146 /*
13147 * Return the newly created nodeset after unlinking it from
13148 * they pseudo parent.
13149 */
13150 cur = newDoc->children->children;
13151 *list = cur;
13152 while (cur != NULL) {
13153 cur->parent = NULL;
13154 cur = cur->next;
13155 }
13156 newDoc->children->children = NULL;
13157 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013158 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013159 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013160
13161 /*
13162 * Record in the parent context the number of entities replacement
13163 * done when parsing that reference.
13164 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013165 if (oldctxt != NULL)
13166 oldctxt->nbentities += ctxt->nbentities;
13167
Daniel Veillard0161e632008-08-28 15:36:32 +000013168 /*
13169 * Also record the size of the entity parsed
13170 */
13171 if (ctxt->input != NULL) {
13172 oldctxt->sizeentities += ctxt->input->consumed;
13173 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13174 }
13175 /*
13176 * And record the last error if any
13177 */
13178 if (ctxt->lastError.code != XML_ERR_OK)
13179 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13180
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013181 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013182 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013183 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13184 oldctxt->node_seq.length = ctxt->node_seq.length;
13185 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013186 ctxt->node_seq.maximum = 0;
13187 ctxt->node_seq.length = 0;
13188 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013189 xmlFreeParserCtxt(ctxt);
13190 newDoc->intSubset = NULL;
13191 newDoc->extSubset = NULL;
13192 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013193
Owen Taylor3473f882001-02-23 17:55:21 +000013194 return(ret);
13195}
13196
Daniel Veillard81273902003-09-30 00:43:48 +000013197#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013198/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013199 * xmlParseExternalEntity:
13200 * @doc: the document the chunk pertains to
13201 * @sax: the SAX handler bloc (possibly NULL)
13202 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13203 * @depth: Used for loop detection, use 0
13204 * @URL: the URL for the entity to load
13205 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013206 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013207 *
13208 * Parse an external general entity
13209 * An external general parsed entity is well-formed if it matches the
13210 * production labeled extParsedEnt.
13211 *
13212 * [78] extParsedEnt ::= TextDecl? content
13213 *
13214 * Returns 0 if the entity is well formed, -1 in case of args problem and
13215 * the parser error code otherwise
13216 */
13217
13218int
13219xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013220 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013221 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013222 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013223}
13224
13225/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013226 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013227 * @doc: the document the chunk pertains to
13228 * @sax: the SAX handler bloc (possibly NULL)
13229 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13230 * @depth: Used for loop detection, use 0
13231 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013232 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013233 *
13234 * Parse a well-balanced chunk of an XML document
13235 * called by the parser
13236 * The allowed sequence for the Well Balanced Chunk is the one defined by
13237 * the content production in the XML grammar:
13238 *
13239 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13240 *
13241 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13242 * the parser error code otherwise
13243 */
13244
13245int
13246xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013247 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013248 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13249 depth, string, lst, 0 );
13250}
Daniel Veillard81273902003-09-30 00:43:48 +000013251#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013252
13253/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013254 * xmlParseBalancedChunkMemoryInternal:
13255 * @oldctxt: the existing parsing context
13256 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13257 * @user_data: the user data field for the parser context
13258 * @lst: the return value for the set of parsed nodes
13259 *
13260 *
13261 * Parse a well-balanced chunk of an XML document
13262 * called by the parser
13263 * The allowed sequence for the Well Balanced Chunk is the one defined by
13264 * the content production in the XML grammar:
13265 *
13266 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13267 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013268 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13269 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013270 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013271 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013272 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013273 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013274static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013275xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13276 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13277 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013278 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013279 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013280 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013281 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013282 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013283 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013284 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013285#ifdef SAX2
13286 int i;
13287#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013288
Daniel Veillard0161e632008-08-28 15:36:32 +000013289 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13290 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013291 return(XML_ERR_ENTITY_LOOP);
13292 }
13293
13294
13295 if (lst != NULL)
13296 *lst = NULL;
13297 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013298 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013299
13300 size = xmlStrlen(string);
13301
13302 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013303 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013304 if (user_data != NULL)
13305 ctxt->userData = user_data;
13306 else
13307 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013308 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13309 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013310 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13311 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13312 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013313
Daniel Veillard74eaec12009-08-26 15:57:20 +020013314#ifdef SAX2
13315 /* propagate namespaces down the entity */
13316 for (i = 0;i < oldctxt->nsNr;i += 2) {
13317 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13318 }
13319#endif
13320
Daniel Veillard328f48c2002-11-15 15:24:34 +000013321 oldsax = ctxt->sax;
13322 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013323 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013324 ctxt->replaceEntities = oldctxt->replaceEntities;
13325 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013326
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013327 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013328 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013329 newDoc = xmlNewDoc(BAD_CAST "1.0");
13330 if (newDoc == NULL) {
13331 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013332 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013333 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013334 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013335 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013336 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013337 newDoc->dict = ctxt->dict;
13338 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013339 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013340 } else {
13341 ctxt->myDoc = oldctxt->myDoc;
13342 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013343 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013344 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013345 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13346 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013347 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013348 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013349 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013350 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013351 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013352 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013353 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013354 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013355 ctxt->myDoc->children = NULL;
13356 ctxt->myDoc->last = NULL;
13357 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013358 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013359 ctxt->instate = XML_PARSER_CONTENT;
13360 ctxt->depth = oldctxt->depth + 1;
13361
Daniel Veillard328f48c2002-11-15 15:24:34 +000013362 ctxt->validate = 0;
13363 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013364 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13365 /*
13366 * ID/IDREF registration will be done in xmlValidateElement below
13367 */
13368 ctxt->loadsubset |= XML_SKIP_IDS;
13369 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013370 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013371 ctxt->attsDefault = oldctxt->attsDefault;
13372 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013373
Daniel Veillard68e9e742002-11-16 15:35:11 +000013374 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013375 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013376 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013377 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013378 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013379 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013380 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013381 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013382 }
13383
13384 if (!ctxt->wellFormed) {
13385 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013386 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013387 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013388 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013389 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013390 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013391 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013392
William M. Brack7b9154b2003-09-27 19:23:50 +000013393 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013394 xmlNodePtr cur;
13395
13396 /*
13397 * Return the newly created nodeset after unlinking it from
13398 * they pseudo parent.
13399 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013400 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013401 *lst = cur;
13402 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013403#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013404 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13405 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13406 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013407 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13408 oldctxt->myDoc, cur);
13409 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013410#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013411 cur->parent = NULL;
13412 cur = cur->next;
13413 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013414 ctxt->myDoc->children->children = NULL;
13415 }
13416 if (ctxt->myDoc != NULL) {
13417 xmlFreeNode(ctxt->myDoc->children);
13418 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013419 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013420 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013421
13422 /*
13423 * Record in the parent context the number of entities replacement
13424 * done when parsing that reference.
13425 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013426 if (oldctxt != NULL)
13427 oldctxt->nbentities += ctxt->nbentities;
13428
Daniel Veillard0161e632008-08-28 15:36:32 +000013429 /*
13430 * Also record the last error if any
13431 */
13432 if (ctxt->lastError.code != XML_ERR_OK)
13433 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13434
Daniel Veillard328f48c2002-11-15 15:24:34 +000013435 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013436 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013437 ctxt->attsDefault = NULL;
13438 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013439 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013440 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013441 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013442 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013443
Daniel Veillard328f48c2002-11-15 15:24:34 +000013444 return(ret);
13445}
13446
Daniel Veillard29b17482004-08-16 00:39:03 +000013447/**
13448 * xmlParseInNodeContext:
13449 * @node: the context node
13450 * @data: the input string
13451 * @datalen: the input string length in bytes
13452 * @options: a combination of xmlParserOption
13453 * @lst: the return value for the set of parsed nodes
13454 *
13455 * Parse a well-balanced chunk of an XML document
13456 * within the context (DTD, namespaces, etc ...) of the given node.
13457 *
13458 * The allowed sequence for the data is a Well Balanced Chunk defined by
13459 * the content production in the XML grammar:
13460 *
13461 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13462 *
13463 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13464 * error code otherwise
13465 */
13466xmlParserErrors
13467xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13468 int options, xmlNodePtr *lst) {
13469#ifdef SAX2
13470 xmlParserCtxtPtr ctxt;
13471 xmlDocPtr doc = NULL;
13472 xmlNodePtr fake, cur;
13473 int nsnr = 0;
13474
13475 xmlParserErrors ret = XML_ERR_OK;
13476
13477 /*
13478 * check all input parameters, grab the document
13479 */
13480 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13481 return(XML_ERR_INTERNAL_ERROR);
13482 switch (node->type) {
13483 case XML_ELEMENT_NODE:
13484 case XML_ATTRIBUTE_NODE:
13485 case XML_TEXT_NODE:
13486 case XML_CDATA_SECTION_NODE:
13487 case XML_ENTITY_REF_NODE:
13488 case XML_PI_NODE:
13489 case XML_COMMENT_NODE:
13490 case XML_DOCUMENT_NODE:
13491 case XML_HTML_DOCUMENT_NODE:
13492 break;
13493 default:
13494 return(XML_ERR_INTERNAL_ERROR);
13495
13496 }
13497 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13498 (node->type != XML_DOCUMENT_NODE) &&
13499 (node->type != XML_HTML_DOCUMENT_NODE))
13500 node = node->parent;
13501 if (node == NULL)
13502 return(XML_ERR_INTERNAL_ERROR);
13503 if (node->type == XML_ELEMENT_NODE)
13504 doc = node->doc;
13505 else
13506 doc = (xmlDocPtr) node;
13507 if (doc == NULL)
13508 return(XML_ERR_INTERNAL_ERROR);
13509
13510 /*
13511 * allocate a context and set-up everything not related to the
13512 * node position in the tree
13513 */
13514 if (doc->type == XML_DOCUMENT_NODE)
13515 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13516#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013517 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013518 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013519 /*
13520 * When parsing in context, it makes no sense to add implied
13521 * elements like html/body/etc...
13522 */
13523 options |= HTML_PARSE_NOIMPLIED;
13524 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013525#endif
13526 else
13527 return(XML_ERR_INTERNAL_ERROR);
13528
13529 if (ctxt == NULL)
13530 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013531
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013532 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013533 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13534 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13535 * we must wait until the last moment to free the original one.
13536 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013537 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013538 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013539 xmlDictFree(ctxt->dict);
13540 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013541 } else
13542 options |= XML_PARSE_NODICT;
13543
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013544 if (doc->encoding != NULL) {
13545 xmlCharEncodingHandlerPtr hdlr;
13546
13547 if (ctxt->encoding != NULL)
13548 xmlFree((xmlChar *) ctxt->encoding);
13549 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13550
13551 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13552 if (hdlr != NULL) {
13553 xmlSwitchToEncoding(ctxt, hdlr);
13554 } else {
13555 return(XML_ERR_UNSUPPORTED_ENCODING);
13556 }
13557 }
13558
Daniel Veillard37334572008-07-31 08:20:02 +000013559 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013560 xmlDetectSAX2(ctxt);
13561 ctxt->myDoc = doc;
13562
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013563 fake = xmlNewComment(NULL);
13564 if (fake == NULL) {
13565 xmlFreeParserCtxt(ctxt);
13566 return(XML_ERR_NO_MEMORY);
13567 }
13568 xmlAddChild(node, fake);
13569
Daniel Veillard29b17482004-08-16 00:39:03 +000013570 if (node->type == XML_ELEMENT_NODE) {
13571 nodePush(ctxt, node);
13572 /*
13573 * initialize the SAX2 namespaces stack
13574 */
13575 cur = node;
13576 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13577 xmlNsPtr ns = cur->nsDef;
13578 const xmlChar *iprefix, *ihref;
13579
13580 while (ns != NULL) {
13581 if (ctxt->dict) {
13582 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13583 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13584 } else {
13585 iprefix = ns->prefix;
13586 ihref = ns->href;
13587 }
13588
13589 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13590 nsPush(ctxt, iprefix, ihref);
13591 nsnr++;
13592 }
13593 ns = ns->next;
13594 }
13595 cur = cur->parent;
13596 }
13597 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013598 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013599
13600 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13601 /*
13602 * ID/IDREF registration will be done in xmlValidateElement below
13603 */
13604 ctxt->loadsubset |= XML_SKIP_IDS;
13605 }
13606
Daniel Veillard499cc922006-01-18 17:22:35 +000013607#ifdef LIBXML_HTML_ENABLED
13608 if (doc->type == XML_HTML_DOCUMENT_NODE)
13609 __htmlParseContent(ctxt);
13610 else
13611#endif
13612 xmlParseContent(ctxt);
13613
Daniel Veillard29b17482004-08-16 00:39:03 +000013614 nsPop(ctxt, nsnr);
13615 if ((RAW == '<') && (NXT(1) == '/')) {
13616 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13617 } else if (RAW != 0) {
13618 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13619 }
13620 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13621 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13622 ctxt->wellFormed = 0;
13623 }
13624
13625 if (!ctxt->wellFormed) {
13626 if (ctxt->errNo == 0)
13627 ret = XML_ERR_INTERNAL_ERROR;
13628 else
13629 ret = (xmlParserErrors)ctxt->errNo;
13630 } else {
13631 ret = XML_ERR_OK;
13632 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013633
Daniel Veillard29b17482004-08-16 00:39:03 +000013634 /*
13635 * Return the newly created nodeset after unlinking it from
13636 * the pseudo sibling.
13637 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013638
Daniel Veillard29b17482004-08-16 00:39:03 +000013639 cur = fake->next;
13640 fake->next = NULL;
13641 node->last = fake;
13642
13643 if (cur != NULL) {
13644 cur->prev = NULL;
13645 }
13646
13647 *lst = cur;
13648
13649 while (cur != NULL) {
13650 cur->parent = NULL;
13651 cur = cur->next;
13652 }
13653
13654 xmlUnlinkNode(fake);
13655 xmlFreeNode(fake);
13656
13657
13658 if (ret != XML_ERR_OK) {
13659 xmlFreeNodeList(*lst);
13660 *lst = NULL;
13661 }
William M. Brackc3f81342004-10-03 01:22:44 +000013662
William M. Brackb7b54de2004-10-06 16:38:01 +000013663 if (doc->dict != NULL)
13664 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013665 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013666
Daniel Veillard29b17482004-08-16 00:39:03 +000013667 return(ret);
13668#else /* !SAX2 */
13669 return(XML_ERR_INTERNAL_ERROR);
13670#endif
13671}
13672
Daniel Veillard81273902003-09-30 00:43:48 +000013673#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013674/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013675 * xmlParseBalancedChunkMemoryRecover:
13676 * @doc: the document the chunk pertains to
13677 * @sax: the SAX handler bloc (possibly NULL)
13678 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13679 * @depth: Used for loop detection, use 0
13680 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13681 * @lst: the return value for the set of parsed nodes
13682 * @recover: return nodes even if the data is broken (use 0)
13683 *
13684 *
13685 * Parse a well-balanced chunk of an XML document
13686 * called by the parser
13687 * The allowed sequence for the Well Balanced Chunk is the one defined by
13688 * the content production in the XML grammar:
13689 *
13690 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13691 *
13692 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13693 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013694 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013695 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013696 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13697 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013698 */
13699int
13700xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013701 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013702 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013703 xmlParserCtxtPtr ctxt;
13704 xmlDocPtr newDoc;
13705 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013706 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013707 int size;
13708 int ret = 0;
13709
Daniel Veillard0161e632008-08-28 15:36:32 +000013710 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013711 return(XML_ERR_ENTITY_LOOP);
13712 }
13713
13714
Daniel Veillardcda96922001-08-21 10:56:31 +000013715 if (lst != NULL)
13716 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013717 if (string == NULL)
13718 return(-1);
13719
13720 size = xmlStrlen(string);
13721
13722 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13723 if (ctxt == NULL) return(-1);
13724 ctxt->userData = ctxt;
13725 if (sax != NULL) {
13726 oldsax = ctxt->sax;
13727 ctxt->sax = sax;
13728 if (user_data != NULL)
13729 ctxt->userData = user_data;
13730 }
13731 newDoc = xmlNewDoc(BAD_CAST "1.0");
13732 if (newDoc == NULL) {
13733 xmlFreeParserCtxt(ctxt);
13734 return(-1);
13735 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013736 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013737 if ((doc != NULL) && (doc->dict != NULL)) {
13738 xmlDictFree(ctxt->dict);
13739 ctxt->dict = doc->dict;
13740 xmlDictReference(ctxt->dict);
13741 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13742 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13743 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13744 ctxt->dictNames = 1;
13745 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013746 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013747 }
Owen Taylor3473f882001-02-23 17:55:21 +000013748 if (doc != NULL) {
13749 newDoc->intSubset = doc->intSubset;
13750 newDoc->extSubset = doc->extSubset;
13751 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013752 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13753 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013754 if (sax != NULL)
13755 ctxt->sax = oldsax;
13756 xmlFreeParserCtxt(ctxt);
13757 newDoc->intSubset = NULL;
13758 newDoc->extSubset = NULL;
13759 xmlFreeDoc(newDoc);
13760 return(-1);
13761 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013762 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13763 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013764 if (doc == NULL) {
13765 ctxt->myDoc = newDoc;
13766 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013767 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013768 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013769 /* Ensure that doc has XML spec namespace */
13770 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13771 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013772 }
13773 ctxt->instate = XML_PARSER_CONTENT;
13774 ctxt->depth = depth;
13775
13776 /*
13777 * Doing validity checking on chunk doesn't make sense
13778 */
13779 ctxt->validate = 0;
13780 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013781 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013782
Daniel Veillardb39bc392002-10-26 19:29:51 +000013783 if ( doc != NULL ){
13784 content = doc->children;
13785 doc->children = NULL;
13786 xmlParseContent(ctxt);
13787 doc->children = content;
13788 }
13789 else {
13790 xmlParseContent(ctxt);
13791 }
Owen Taylor3473f882001-02-23 17:55:21 +000013792 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013793 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013794 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013795 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013796 }
13797 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013798 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013799 }
13800
13801 if (!ctxt->wellFormed) {
13802 if (ctxt->errNo == 0)
13803 ret = 1;
13804 else
13805 ret = ctxt->errNo;
13806 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013807 ret = 0;
13808 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013809
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013810 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13811 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013812
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013813 /*
13814 * Return the newly created nodeset after unlinking it from
13815 * they pseudo parent.
13816 */
13817 cur = newDoc->children->children;
13818 *lst = cur;
13819 while (cur != NULL) {
13820 xmlSetTreeDoc(cur, doc);
13821 cur->parent = NULL;
13822 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013823 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013824 newDoc->children->children = NULL;
13825 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013826
13827 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013828 ctxt->sax = oldsax;
13829 xmlFreeParserCtxt(ctxt);
13830 newDoc->intSubset = NULL;
13831 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013832 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013833 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013834
Owen Taylor3473f882001-02-23 17:55:21 +000013835 return(ret);
13836}
13837
13838/**
13839 * xmlSAXParseEntity:
13840 * @sax: the SAX handler block
13841 * @filename: the filename
13842 *
13843 * parse an XML external entity out of context and build a tree.
13844 * It use the given SAX function block to handle the parsing callback.
13845 * If sax is NULL, fallback to the default DOM tree building routines.
13846 *
13847 * [78] extParsedEnt ::= TextDecl? content
13848 *
13849 * This correspond to a "Well Balanced" chunk
13850 *
13851 * Returns the resulting document tree
13852 */
13853
13854xmlDocPtr
13855xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13856 xmlDocPtr ret;
13857 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013858
13859 ctxt = xmlCreateFileParserCtxt(filename);
13860 if (ctxt == NULL) {
13861 return(NULL);
13862 }
13863 if (sax != NULL) {
13864 if (ctxt->sax != NULL)
13865 xmlFree(ctxt->sax);
13866 ctxt->sax = sax;
13867 ctxt->userData = NULL;
13868 }
13869
Owen Taylor3473f882001-02-23 17:55:21 +000013870 xmlParseExtParsedEnt(ctxt);
13871
13872 if (ctxt->wellFormed)
13873 ret = ctxt->myDoc;
13874 else {
13875 ret = NULL;
13876 xmlFreeDoc(ctxt->myDoc);
13877 ctxt->myDoc = NULL;
13878 }
13879 if (sax != NULL)
13880 ctxt->sax = NULL;
13881 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013882
Owen Taylor3473f882001-02-23 17:55:21 +000013883 return(ret);
13884}
13885
13886/**
13887 * xmlParseEntity:
13888 * @filename: the filename
13889 *
13890 * parse an XML external entity out of context and build a tree.
13891 *
13892 * [78] extParsedEnt ::= TextDecl? content
13893 *
13894 * This correspond to a "Well Balanced" chunk
13895 *
13896 * Returns the resulting document tree
13897 */
13898
13899xmlDocPtr
13900xmlParseEntity(const char *filename) {
13901 return(xmlSAXParseEntity(NULL, filename));
13902}
Daniel Veillard81273902003-09-30 00:43:48 +000013903#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013904
13905/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013906 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013907 * @URL: the entity URL
13908 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013909 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013910 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013911 *
13912 * Create a parser context for an external entity
13913 * Automatic support for ZLIB/Compress compressed document is provided
13914 * by default if found at compile-time.
13915 *
13916 * Returns the new parser context or NULL
13917 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013918static xmlParserCtxtPtr
13919xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13920 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013921 xmlParserCtxtPtr ctxt;
13922 xmlParserInputPtr inputStream;
13923 char *directory = NULL;
13924 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013925
Owen Taylor3473f882001-02-23 17:55:21 +000013926 ctxt = xmlNewParserCtxt();
13927 if (ctxt == NULL) {
13928 return(NULL);
13929 }
13930
Daniel Veillard48247b42009-07-10 16:12:46 +020013931 if (pctx != NULL) {
13932 ctxt->options = pctx->options;
13933 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013934 }
13935
Owen Taylor3473f882001-02-23 17:55:21 +000013936 uri = xmlBuildURI(URL, base);
13937
13938 if (uri == NULL) {
13939 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13940 if (inputStream == NULL) {
13941 xmlFreeParserCtxt(ctxt);
13942 return(NULL);
13943 }
13944
13945 inputPush(ctxt, inputStream);
13946
13947 if ((ctxt->directory == NULL) && (directory == NULL))
13948 directory = xmlParserGetDirectory((char *)URL);
13949 if ((ctxt->directory == NULL) && (directory != NULL))
13950 ctxt->directory = directory;
13951 } else {
13952 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13953 if (inputStream == NULL) {
13954 xmlFree(uri);
13955 xmlFreeParserCtxt(ctxt);
13956 return(NULL);
13957 }
13958
13959 inputPush(ctxt, inputStream);
13960
13961 if ((ctxt->directory == NULL) && (directory == NULL))
13962 directory = xmlParserGetDirectory((char *)uri);
13963 if ((ctxt->directory == NULL) && (directory != NULL))
13964 ctxt->directory = directory;
13965 xmlFree(uri);
13966 }
Owen Taylor3473f882001-02-23 17:55:21 +000013967 return(ctxt);
13968}
13969
Rob Richards9c0aa472009-03-26 18:10:19 +000013970/**
13971 * xmlCreateEntityParserCtxt:
13972 * @URL: the entity URL
13973 * @ID: the entity PUBLIC ID
13974 * @base: a possible base for the target URI
13975 *
13976 * Create a parser context for an external entity
13977 * Automatic support for ZLIB/Compress compressed document is provided
13978 * by default if found at compile-time.
13979 *
13980 * Returns the new parser context or NULL
13981 */
13982xmlParserCtxtPtr
13983xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13984 const xmlChar *base) {
13985 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13986
13987}
13988
Owen Taylor3473f882001-02-23 17:55:21 +000013989/************************************************************************
13990 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013991 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013992 * *
13993 ************************************************************************/
13994
13995/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013996 * xmlCreateURLParserCtxt:
13997 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013998 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013999 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014000 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014001 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014002 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014003 *
14004 * Returns the new parser context or NULL
14005 */
14006xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014007xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014008{
14009 xmlParserCtxtPtr ctxt;
14010 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014011 char *directory = NULL;
14012
Owen Taylor3473f882001-02-23 17:55:21 +000014013 ctxt = xmlNewParserCtxt();
14014 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014015 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014016 return(NULL);
14017 }
14018
Daniel Veillarddf292f72005-01-16 19:00:15 +000014019 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014020 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014021 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014022
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014023 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014024 if (inputStream == NULL) {
14025 xmlFreeParserCtxt(ctxt);
14026 return(NULL);
14027 }
14028
Owen Taylor3473f882001-02-23 17:55:21 +000014029 inputPush(ctxt, inputStream);
14030 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014031 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014032 if ((ctxt->directory == NULL) && (directory != NULL))
14033 ctxt->directory = directory;
14034
14035 return(ctxt);
14036}
14037
Daniel Veillard61b93382003-11-03 14:28:31 +000014038/**
14039 * xmlCreateFileParserCtxt:
14040 * @filename: the filename
14041 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014042 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014043 * Automatic support for ZLIB/Compress compressed document is provided
14044 * by default if found at compile-time.
14045 *
14046 * Returns the new parser context or NULL
14047 */
14048xmlParserCtxtPtr
14049xmlCreateFileParserCtxt(const char *filename)
14050{
14051 return(xmlCreateURLParserCtxt(filename, 0));
14052}
14053
Daniel Veillard81273902003-09-30 00:43:48 +000014054#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014055/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014056 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014057 * @sax: the SAX handler block
14058 * @filename: the filename
14059 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14060 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014061 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014062 *
14063 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14064 * compressed document is provided by default if found at compile-time.
14065 * It use the given SAX function block to handle the parsing callback.
14066 * If sax is NULL, fallback to the default DOM tree building routines.
14067 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014068 * User data (void *) is stored within the parser context in the
14069 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014070 *
Owen Taylor3473f882001-02-23 17:55:21 +000014071 * Returns the resulting document tree
14072 */
14073
14074xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014075xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14076 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014077 xmlDocPtr ret;
14078 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014079
Daniel Veillard635ef722001-10-29 11:48:19 +000014080 xmlInitParser();
14081
Owen Taylor3473f882001-02-23 17:55:21 +000014082 ctxt = xmlCreateFileParserCtxt(filename);
14083 if (ctxt == NULL) {
14084 return(NULL);
14085 }
14086 if (sax != NULL) {
14087 if (ctxt->sax != NULL)
14088 xmlFree(ctxt->sax);
14089 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014090 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014091 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014092 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014093 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014094 }
Owen Taylor3473f882001-02-23 17:55:21 +000014095
Daniel Veillard37d2d162008-03-14 10:54:00 +000014096 if (ctxt->directory == NULL)
14097 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014098
Daniel Veillarddad3f682002-11-17 16:47:27 +000014099 ctxt->recovery = recovery;
14100
Owen Taylor3473f882001-02-23 17:55:21 +000014101 xmlParseDocument(ctxt);
14102
William M. Brackc07329e2003-09-08 01:57:30 +000014103 if ((ctxt->wellFormed) || recovery) {
14104 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014105 if (ret != NULL) {
14106 if (ctxt->input->buf->compressed > 0)
14107 ret->compression = 9;
14108 else
14109 ret->compression = ctxt->input->buf->compressed;
14110 }
William M. Brackc07329e2003-09-08 01:57:30 +000014111 }
Owen Taylor3473f882001-02-23 17:55:21 +000014112 else {
14113 ret = NULL;
14114 xmlFreeDoc(ctxt->myDoc);
14115 ctxt->myDoc = NULL;
14116 }
14117 if (sax != NULL)
14118 ctxt->sax = NULL;
14119 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014120
Owen Taylor3473f882001-02-23 17:55:21 +000014121 return(ret);
14122}
14123
14124/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014125 * xmlSAXParseFile:
14126 * @sax: the SAX handler block
14127 * @filename: the filename
14128 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14129 * documents
14130 *
14131 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14132 * compressed document is provided by default if found at compile-time.
14133 * It use the given SAX function block to handle the parsing callback.
14134 * If sax is NULL, fallback to the default DOM tree building routines.
14135 *
14136 * Returns the resulting document tree
14137 */
14138
14139xmlDocPtr
14140xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14141 int recovery) {
14142 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14143}
14144
14145/**
Owen Taylor3473f882001-02-23 17:55:21 +000014146 * xmlRecoverDoc:
14147 * @cur: a pointer to an array of xmlChar
14148 *
14149 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014150 * In the case the document is not Well Formed, a attempt to build a
14151 * tree is tried anyway
14152 *
14153 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014154 */
14155
14156xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014157xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014158 return(xmlSAXParseDoc(NULL, cur, 1));
14159}
14160
14161/**
14162 * xmlParseFile:
14163 * @filename: the filename
14164 *
14165 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14166 * compressed document is provided by default if found at compile-time.
14167 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014168 * Returns the resulting document tree if the file was wellformed,
14169 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014170 */
14171
14172xmlDocPtr
14173xmlParseFile(const char *filename) {
14174 return(xmlSAXParseFile(NULL, filename, 0));
14175}
14176
14177/**
14178 * xmlRecoverFile:
14179 * @filename: the filename
14180 *
14181 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14182 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014183 * In the case the document is not Well Formed, it attempts to build
14184 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014185 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014186 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014187 */
14188
14189xmlDocPtr
14190xmlRecoverFile(const char *filename) {
14191 return(xmlSAXParseFile(NULL, filename, 1));
14192}
14193
14194
14195/**
14196 * xmlSetupParserForBuffer:
14197 * @ctxt: an XML parser context
14198 * @buffer: a xmlChar * buffer
14199 * @filename: a file name
14200 *
14201 * Setup the parser context to parse a new buffer; Clears any prior
14202 * contents from the parser context. The buffer parameter must not be
14203 * NULL, but the filename parameter can be
14204 */
14205void
14206xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14207 const char* filename)
14208{
14209 xmlParserInputPtr input;
14210
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014211 if ((ctxt == NULL) || (buffer == NULL))
14212 return;
14213
Owen Taylor3473f882001-02-23 17:55:21 +000014214 input = xmlNewInputStream(ctxt);
14215 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014216 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014217 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014218 return;
14219 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014220
Owen Taylor3473f882001-02-23 17:55:21 +000014221 xmlClearParserCtxt(ctxt);
14222 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014223 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014224 input->base = buffer;
14225 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014226 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014227 inputPush(ctxt, input);
14228}
14229
14230/**
14231 * xmlSAXUserParseFile:
14232 * @sax: a SAX handler
14233 * @user_data: The user data returned on SAX callbacks
14234 * @filename: a file name
14235 *
14236 * parse an XML file and call the given SAX handler routines.
14237 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014238 *
Owen Taylor3473f882001-02-23 17:55:21 +000014239 * Returns 0 in case of success or a error number otherwise
14240 */
14241int
14242xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14243 const char *filename) {
14244 int ret = 0;
14245 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014246
Owen Taylor3473f882001-02-23 17:55:21 +000014247 ctxt = xmlCreateFileParserCtxt(filename);
14248 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014249 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014250 xmlFree(ctxt->sax);
14251 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014252 xmlDetectSAX2(ctxt);
14253
Owen Taylor3473f882001-02-23 17:55:21 +000014254 if (user_data != NULL)
14255 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014256
Owen Taylor3473f882001-02-23 17:55:21 +000014257 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014258
Owen Taylor3473f882001-02-23 17:55:21 +000014259 if (ctxt->wellFormed)
14260 ret = 0;
14261 else {
14262 if (ctxt->errNo != 0)
14263 ret = ctxt->errNo;
14264 else
14265 ret = -1;
14266 }
14267 if (sax != NULL)
14268 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014269 if (ctxt->myDoc != NULL) {
14270 xmlFreeDoc(ctxt->myDoc);
14271 ctxt->myDoc = NULL;
14272 }
Owen Taylor3473f882001-02-23 17:55:21 +000014273 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014274
Owen Taylor3473f882001-02-23 17:55:21 +000014275 return ret;
14276}
Daniel Veillard81273902003-09-30 00:43:48 +000014277#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014278
14279/************************************************************************
14280 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014281 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014282 * *
14283 ************************************************************************/
14284
14285/**
14286 * xmlCreateMemoryParserCtxt:
14287 * @buffer: a pointer to a char array
14288 * @size: the size of the array
14289 *
14290 * Create a parser context for an XML in-memory document.
14291 *
14292 * Returns the new parser context or NULL
14293 */
14294xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014295xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014296 xmlParserCtxtPtr ctxt;
14297 xmlParserInputPtr input;
14298 xmlParserInputBufferPtr buf;
14299
14300 if (buffer == NULL)
14301 return(NULL);
14302 if (size <= 0)
14303 return(NULL);
14304
14305 ctxt = xmlNewParserCtxt();
14306 if (ctxt == NULL)
14307 return(NULL);
14308
Daniel Veillard53350552003-09-18 13:35:51 +000014309 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014310 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014311 if (buf == NULL) {
14312 xmlFreeParserCtxt(ctxt);
14313 return(NULL);
14314 }
Owen Taylor3473f882001-02-23 17:55:21 +000014315
14316 input = xmlNewInputStream(ctxt);
14317 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014318 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014319 xmlFreeParserCtxt(ctxt);
14320 return(NULL);
14321 }
14322
14323 input->filename = NULL;
14324 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014325 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014326
14327 inputPush(ctxt, input);
14328 return(ctxt);
14329}
14330
Daniel Veillard81273902003-09-30 00:43:48 +000014331#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014332/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014333 * xmlSAXParseMemoryWithData:
14334 * @sax: the SAX handler block
14335 * @buffer: an pointer to a char array
14336 * @size: the size of the array
14337 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14338 * documents
14339 * @data: the userdata
14340 *
14341 * parse an XML in-memory block and use the given SAX function block
14342 * to handle the parsing callback. If sax is NULL, fallback to the default
14343 * DOM tree building routines.
14344 *
14345 * User data (void *) is stored within the parser context in the
14346 * context's _private member, so it is available nearly everywhere in libxml
14347 *
14348 * Returns the resulting document tree
14349 */
14350
14351xmlDocPtr
14352xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14353 int size, int recovery, void *data) {
14354 xmlDocPtr ret;
14355 xmlParserCtxtPtr ctxt;
14356
Daniel Veillardab2a7632009-07-09 08:45:03 +020014357 xmlInitParser();
14358
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014359 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14360 if (ctxt == NULL) return(NULL);
14361 if (sax != NULL) {
14362 if (ctxt->sax != NULL)
14363 xmlFree(ctxt->sax);
14364 ctxt->sax = sax;
14365 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014366 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014367 if (data!=NULL) {
14368 ctxt->_private=data;
14369 }
14370
Daniel Veillardadba5f12003-04-04 16:09:01 +000014371 ctxt->recovery = recovery;
14372
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014373 xmlParseDocument(ctxt);
14374
14375 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14376 else {
14377 ret = NULL;
14378 xmlFreeDoc(ctxt->myDoc);
14379 ctxt->myDoc = NULL;
14380 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014381 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014382 ctxt->sax = NULL;
14383 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014384
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014385 return(ret);
14386}
14387
14388/**
Owen Taylor3473f882001-02-23 17:55:21 +000014389 * xmlSAXParseMemory:
14390 * @sax: the SAX handler block
14391 * @buffer: an pointer to a char array
14392 * @size: the size of the array
14393 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14394 * documents
14395 *
14396 * parse an XML in-memory block and use the given SAX function block
14397 * to handle the parsing callback. If sax is NULL, fallback to the default
14398 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014399 *
Owen Taylor3473f882001-02-23 17:55:21 +000014400 * Returns the resulting document tree
14401 */
14402xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014403xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14404 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014405 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014406}
14407
14408/**
14409 * xmlParseMemory:
14410 * @buffer: an pointer to a char array
14411 * @size: the size of the array
14412 *
14413 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014414 *
Owen Taylor3473f882001-02-23 17:55:21 +000014415 * Returns the resulting document tree
14416 */
14417
Daniel Veillard50822cb2001-07-26 20:05:51 +000014418xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014419 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14420}
14421
14422/**
14423 * xmlRecoverMemory:
14424 * @buffer: an pointer to a char array
14425 * @size: the size of the array
14426 *
14427 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014428 * In the case the document is not Well Formed, an attempt to
14429 * build a tree is tried anyway
14430 *
14431 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014432 */
14433
Daniel Veillard50822cb2001-07-26 20:05:51 +000014434xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014435 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14436}
14437
14438/**
14439 * xmlSAXUserParseMemory:
14440 * @sax: a SAX handler
14441 * @user_data: The user data returned on SAX callbacks
14442 * @buffer: an in-memory XML document input
14443 * @size: the length of the XML document in bytes
14444 *
14445 * A better SAX parsing routine.
14446 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014447 *
Owen Taylor3473f882001-02-23 17:55:21 +000014448 * Returns 0 in case of success or a error number otherwise
14449 */
14450int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014451 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014452 int ret = 0;
14453 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014454
14455 xmlInitParser();
14456
Owen Taylor3473f882001-02-23 17:55:21 +000014457 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14458 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014459 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14460 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014461 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014462 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014463
Daniel Veillard30211a02001-04-26 09:33:18 +000014464 if (user_data != NULL)
14465 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014466
Owen Taylor3473f882001-02-23 17:55:21 +000014467 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014468
Owen Taylor3473f882001-02-23 17:55:21 +000014469 if (ctxt->wellFormed)
14470 ret = 0;
14471 else {
14472 if (ctxt->errNo != 0)
14473 ret = ctxt->errNo;
14474 else
14475 ret = -1;
14476 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014477 if (sax != NULL)
14478 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014479 if (ctxt->myDoc != NULL) {
14480 xmlFreeDoc(ctxt->myDoc);
14481 ctxt->myDoc = NULL;
14482 }
Owen Taylor3473f882001-02-23 17:55:21 +000014483 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014484
Owen Taylor3473f882001-02-23 17:55:21 +000014485 return ret;
14486}
Daniel Veillard81273902003-09-30 00:43:48 +000014487#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014488
14489/**
14490 * xmlCreateDocParserCtxt:
14491 * @cur: a pointer to an array of xmlChar
14492 *
14493 * Creates a parser context for an XML in-memory document.
14494 *
14495 * Returns the new parser context or NULL
14496 */
14497xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014498xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014499 int len;
14500
14501 if (cur == NULL)
14502 return(NULL);
14503 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014504 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014505}
14506
Daniel Veillard81273902003-09-30 00:43:48 +000014507#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014508/**
14509 * xmlSAXParseDoc:
14510 * @sax: the SAX handler block
14511 * @cur: a pointer to an array of xmlChar
14512 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14513 * documents
14514 *
14515 * parse an XML in-memory document and build a tree.
14516 * It use the given SAX function block to handle the parsing callback.
14517 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014518 *
Owen Taylor3473f882001-02-23 17:55:21 +000014519 * Returns the resulting document tree
14520 */
14521
14522xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014523xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014524 xmlDocPtr ret;
14525 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014526 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014527
Daniel Veillard38936062004-11-04 17:45:11 +000014528 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014529
14530
14531 ctxt = xmlCreateDocParserCtxt(cur);
14532 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014533 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014534 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014535 ctxt->sax = sax;
14536 ctxt->userData = NULL;
14537 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014538 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014539
14540 xmlParseDocument(ctxt);
14541 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14542 else {
14543 ret = NULL;
14544 xmlFreeDoc(ctxt->myDoc);
14545 ctxt->myDoc = NULL;
14546 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014547 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014548 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014549 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014550
Owen Taylor3473f882001-02-23 17:55:21 +000014551 return(ret);
14552}
14553
14554/**
14555 * xmlParseDoc:
14556 * @cur: a pointer to an array of xmlChar
14557 *
14558 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014559 *
Owen Taylor3473f882001-02-23 17:55:21 +000014560 * Returns the resulting document tree
14561 */
14562
14563xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014564xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014565 return(xmlSAXParseDoc(NULL, cur, 0));
14566}
Daniel Veillard81273902003-09-30 00:43:48 +000014567#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014568
Daniel Veillard81273902003-09-30 00:43:48 +000014569#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014570/************************************************************************
14571 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014572 * Specific function to keep track of entities references *
14573 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014574 * *
14575 ************************************************************************/
14576
14577static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14578
14579/**
14580 * xmlAddEntityReference:
14581 * @ent : A valid entity
14582 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014583 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014584 *
14585 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14586 */
14587static void
14588xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14589 xmlNodePtr lastNode)
14590{
14591 if (xmlEntityRefFunc != NULL) {
14592 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14593 }
14594}
14595
14596
14597/**
14598 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014599 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014600 *
14601 * Set the function to call call back when a xml reference has been made
14602 */
14603void
14604xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14605{
14606 xmlEntityRefFunc = func;
14607}
Daniel Veillard81273902003-09-30 00:43:48 +000014608#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014609
14610/************************************************************************
14611 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014612 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014613 * *
14614 ************************************************************************/
14615
14616#ifdef LIBXML_XPATH_ENABLED
14617#include <libxml/xpath.h>
14618#endif
14619
Daniel Veillardffa3c742005-07-21 13:24:09 +000014620extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014621static int xmlParserInitialized = 0;
14622
14623/**
14624 * xmlInitParser:
14625 *
14626 * Initialization function for the XML parser.
14627 * This is not reentrant. Call once before processing in case of
14628 * use in multithreaded programs.
14629 */
14630
14631void
14632xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014633 if (xmlParserInitialized != 0)
14634 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014635
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014636#ifdef LIBXML_THREAD_ENABLED
14637 __xmlGlobalInitMutexLock();
14638 if (xmlParserInitialized == 0) {
14639#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014640 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014641 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014642 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14643 (xmlGenericError == NULL))
14644 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014645 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014646 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014647 xmlInitCharEncodingHandlers();
14648 xmlDefaultSAXHandlerInit();
14649 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014650#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014651 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014652#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014653#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014654 htmlInitAutoClose();
14655 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014656#endif
14657#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014658 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014659#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014660 xmlParserInitialized = 1;
14661#ifdef LIBXML_THREAD_ENABLED
14662 }
14663 __xmlGlobalInitMutexUnlock();
14664#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014665}
14666
14667/**
14668 * xmlCleanupParser:
14669 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014670 * This function name is somewhat misleading. It does not clean up
14671 * parser state, it cleans up memory allocated by the library itself.
14672 * It is a cleanup function for the XML library. It tries to reclaim all
14673 * related global memory allocated for the library processing.
14674 * It doesn't deallocate any document related memory. One should
14675 * call xmlCleanupParser() only when the process has finished using
14676 * the library and all XML/HTML documents built with it.
14677 * See also xmlInitParser() which has the opposite function of preparing
14678 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014679 *
14680 * WARNING: if your application is multithreaded or has plugin support
14681 * calling this may crash the application if another thread or
14682 * a plugin is still using libxml2. It's sometimes very hard to
14683 * guess if libxml2 is in use in the application, some libraries
14684 * or plugins may use it without notice. In case of doubt abstain
14685 * from calling this function or do it just before calling exit()
14686 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014687 */
14688
14689void
14690xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014691 if (!xmlParserInitialized)
14692 return;
14693
Owen Taylor3473f882001-02-23 17:55:21 +000014694 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014695#ifdef LIBXML_CATALOG_ENABLED
14696 xmlCatalogCleanup();
14697#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014698 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014699 xmlCleanupInputCallbacks();
14700#ifdef LIBXML_OUTPUT_ENABLED
14701 xmlCleanupOutputCallbacks();
14702#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014703#ifdef LIBXML_SCHEMAS_ENABLED
14704 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014705 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014706#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014707 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014708 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014709 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014710 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014711 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014712}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014713
14714/************************************************************************
14715 * *
14716 * New set (2.6.0) of simpler and more flexible APIs *
14717 * *
14718 ************************************************************************/
14719
14720/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014721 * DICT_FREE:
14722 * @str: a string
14723 *
14724 * Free a string if it is not owned by the "dict" dictionnary in the
14725 * current scope
14726 */
14727#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014728 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014729 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14730 xmlFree((char *)(str));
14731
14732/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014733 * xmlCtxtReset:
14734 * @ctxt: an XML parser context
14735 *
14736 * Reset a parser context
14737 */
14738void
14739xmlCtxtReset(xmlParserCtxtPtr ctxt)
14740{
14741 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014742 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014743
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014744 if (ctxt == NULL)
14745 return;
14746
14747 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014748
14749 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14750 xmlFreeInputStream(input);
14751 }
14752 ctxt->inputNr = 0;
14753 ctxt->input = NULL;
14754
14755 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014756 if (ctxt->spaceTab != NULL) {
14757 ctxt->spaceTab[0] = -1;
14758 ctxt->space = &ctxt->spaceTab[0];
14759 } else {
14760 ctxt->space = NULL;
14761 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014762
14763
14764 ctxt->nodeNr = 0;
14765 ctxt->node = NULL;
14766
14767 ctxt->nameNr = 0;
14768 ctxt->name = NULL;
14769
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014770 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014771 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014772 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014773 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014774 DICT_FREE(ctxt->directory);
14775 ctxt->directory = NULL;
14776 DICT_FREE(ctxt->extSubURI);
14777 ctxt->extSubURI = NULL;
14778 DICT_FREE(ctxt->extSubSystem);
14779 ctxt->extSubSystem = NULL;
14780 if (ctxt->myDoc != NULL)
14781 xmlFreeDoc(ctxt->myDoc);
14782 ctxt->myDoc = NULL;
14783
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014784 ctxt->standalone = -1;
14785 ctxt->hasExternalSubset = 0;
14786 ctxt->hasPErefs = 0;
14787 ctxt->html = 0;
14788 ctxt->external = 0;
14789 ctxt->instate = XML_PARSER_START;
14790 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014791
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014792 ctxt->wellFormed = 1;
14793 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014794 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014795 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014796#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014797 ctxt->vctxt.userData = ctxt;
14798 ctxt->vctxt.error = xmlParserValidityError;
14799 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014800#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014801 ctxt->record_info = 0;
14802 ctxt->nbChars = 0;
14803 ctxt->checkIndex = 0;
14804 ctxt->inSubset = 0;
14805 ctxt->errNo = XML_ERR_OK;
14806 ctxt->depth = 0;
14807 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14808 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014809 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014810 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014811 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014812 xmlInitNodeInfoSeq(&ctxt->node_seq);
14813
14814 if (ctxt->attsDefault != NULL) {
14815 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14816 ctxt->attsDefault = NULL;
14817 }
14818 if (ctxt->attsSpecial != NULL) {
14819 xmlHashFree(ctxt->attsSpecial, NULL);
14820 ctxt->attsSpecial = NULL;
14821 }
14822
Daniel Veillard4432df22003-09-28 18:58:27 +000014823#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014824 if (ctxt->catalogs != NULL)
14825 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014826#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014827 if (ctxt->lastError.code != XML_ERR_OK)
14828 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014829}
14830
14831/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014832 * xmlCtxtResetPush:
14833 * @ctxt: an XML parser context
14834 * @chunk: a pointer to an array of chars
14835 * @size: number of chars in the array
14836 * @filename: an optional file name or URI
14837 * @encoding: the document encoding, or NULL
14838 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014839 * Reset a push parser context
14840 *
14841 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014842 */
14843int
14844xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14845 int size, const char *filename, const char *encoding)
14846{
14847 xmlParserInputPtr inputStream;
14848 xmlParserInputBufferPtr buf;
14849 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14850
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014851 if (ctxt == NULL)
14852 return(1);
14853
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014854 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14855 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14856
14857 buf = xmlAllocParserInputBuffer(enc);
14858 if (buf == NULL)
14859 return(1);
14860
14861 if (ctxt == NULL) {
14862 xmlFreeParserInputBuffer(buf);
14863 return(1);
14864 }
14865
14866 xmlCtxtReset(ctxt);
14867
14868 if (ctxt->pushTab == NULL) {
14869 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14870 sizeof(xmlChar *));
14871 if (ctxt->pushTab == NULL) {
14872 xmlErrMemory(ctxt, NULL);
14873 xmlFreeParserInputBuffer(buf);
14874 return(1);
14875 }
14876 }
14877
14878 if (filename == NULL) {
14879 ctxt->directory = NULL;
14880 } else {
14881 ctxt->directory = xmlParserGetDirectory(filename);
14882 }
14883
14884 inputStream = xmlNewInputStream(ctxt);
14885 if (inputStream == NULL) {
14886 xmlFreeParserInputBuffer(buf);
14887 return(1);
14888 }
14889
14890 if (filename == NULL)
14891 inputStream->filename = NULL;
14892 else
14893 inputStream->filename = (char *)
14894 xmlCanonicPath((const xmlChar *) filename);
14895 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014896 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014897
14898 inputPush(ctxt, inputStream);
14899
14900 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14901 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014902 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14903 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014904
14905 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14906
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014907 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014908#ifdef DEBUG_PUSH
14909 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14910#endif
14911 }
14912
14913 if (encoding != NULL) {
14914 xmlCharEncodingHandlerPtr hdlr;
14915
Daniel Veillard37334572008-07-31 08:20:02 +000014916 if (ctxt->encoding != NULL)
14917 xmlFree((xmlChar *) ctxt->encoding);
14918 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14919
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014920 hdlr = xmlFindCharEncodingHandler(encoding);
14921 if (hdlr != NULL) {
14922 xmlSwitchToEncoding(ctxt, hdlr);
14923 } else {
14924 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14925 "Unsupported encoding %s\n", BAD_CAST encoding);
14926 }
14927 } else if (enc != XML_CHAR_ENCODING_NONE) {
14928 xmlSwitchEncoding(ctxt, enc);
14929 }
14930
14931 return(0);
14932}
14933
Daniel Veillard37334572008-07-31 08:20:02 +000014934
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014935/**
Daniel Veillard37334572008-07-31 08:20:02 +000014936 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014937 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014938 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014939 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014940 *
14941 * Applies the options to the parser context
14942 *
14943 * Returns 0 in case of success, the set of unknown or unimplemented options
14944 * in case of error.
14945 */
Daniel Veillard37334572008-07-31 08:20:02 +000014946static int
14947xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014948{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014949 if (ctxt == NULL)
14950 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014951 if (encoding != NULL) {
14952 if (ctxt->encoding != NULL)
14953 xmlFree((xmlChar *) ctxt->encoding);
14954 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14955 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014956 if (options & XML_PARSE_RECOVER) {
14957 ctxt->recovery = 1;
14958 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014959 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014960 } else
14961 ctxt->recovery = 0;
14962 if (options & XML_PARSE_DTDLOAD) {
14963 ctxt->loadsubset = XML_DETECT_IDS;
14964 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014965 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014966 } else
14967 ctxt->loadsubset = 0;
14968 if (options & XML_PARSE_DTDATTR) {
14969 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14970 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014971 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014972 }
14973 if (options & XML_PARSE_NOENT) {
14974 ctxt->replaceEntities = 1;
14975 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14976 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014977 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014978 } else
14979 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014980 if (options & XML_PARSE_PEDANTIC) {
14981 ctxt->pedantic = 1;
14982 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014983 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014984 } else
14985 ctxt->pedantic = 0;
14986 if (options & XML_PARSE_NOBLANKS) {
14987 ctxt->keepBlanks = 0;
14988 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14989 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014990 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014991 } else
14992 ctxt->keepBlanks = 1;
14993 if (options & XML_PARSE_DTDVALID) {
14994 ctxt->validate = 1;
14995 if (options & XML_PARSE_NOWARNING)
14996 ctxt->vctxt.warning = NULL;
14997 if (options & XML_PARSE_NOERROR)
14998 ctxt->vctxt.error = NULL;
14999 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015000 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015001 } else
15002 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015003 if (options & XML_PARSE_NOWARNING) {
15004 ctxt->sax->warning = NULL;
15005 options -= XML_PARSE_NOWARNING;
15006 }
15007 if (options & XML_PARSE_NOERROR) {
15008 ctxt->sax->error = NULL;
15009 ctxt->sax->fatalError = NULL;
15010 options -= XML_PARSE_NOERROR;
15011 }
Daniel Veillard81273902003-09-30 00:43:48 +000015012#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015013 if (options & XML_PARSE_SAX1) {
15014 ctxt->sax->startElement = xmlSAX2StartElement;
15015 ctxt->sax->endElement = xmlSAX2EndElement;
15016 ctxt->sax->startElementNs = NULL;
15017 ctxt->sax->endElementNs = NULL;
15018 ctxt->sax->initialized = 1;
15019 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015020 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015021 }
Daniel Veillard81273902003-09-30 00:43:48 +000015022#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015023 if (options & XML_PARSE_NODICT) {
15024 ctxt->dictNames = 0;
15025 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015026 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015027 } else {
15028 ctxt->dictNames = 1;
15029 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015030 if (options & XML_PARSE_NOCDATA) {
15031 ctxt->sax->cdataBlock = NULL;
15032 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015033 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015034 }
15035 if (options & XML_PARSE_NSCLEAN) {
15036 ctxt->options |= XML_PARSE_NSCLEAN;
15037 options -= XML_PARSE_NSCLEAN;
15038 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015039 if (options & XML_PARSE_NONET) {
15040 ctxt->options |= XML_PARSE_NONET;
15041 options -= XML_PARSE_NONET;
15042 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015043 if (options & XML_PARSE_COMPACT) {
15044 ctxt->options |= XML_PARSE_COMPACT;
15045 options -= XML_PARSE_COMPACT;
15046 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015047 if (options & XML_PARSE_OLD10) {
15048 ctxt->options |= XML_PARSE_OLD10;
15049 options -= XML_PARSE_OLD10;
15050 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015051 if (options & XML_PARSE_NOBASEFIX) {
15052 ctxt->options |= XML_PARSE_NOBASEFIX;
15053 options -= XML_PARSE_NOBASEFIX;
15054 }
15055 if (options & XML_PARSE_HUGE) {
15056 ctxt->options |= XML_PARSE_HUGE;
15057 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015058 if (ctxt->dict != NULL)
15059 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015060 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015061 if (options & XML_PARSE_OLDSAX) {
15062 ctxt->options |= XML_PARSE_OLDSAX;
15063 options -= XML_PARSE_OLDSAX;
15064 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015065 if (options & XML_PARSE_IGNORE_ENC) {
15066 ctxt->options |= XML_PARSE_IGNORE_ENC;
15067 options -= XML_PARSE_IGNORE_ENC;
15068 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015069 if (options & XML_PARSE_BIG_LINES) {
15070 ctxt->options |= XML_PARSE_BIG_LINES;
15071 options -= XML_PARSE_BIG_LINES;
15072 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015073 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015074 return (options);
15075}
15076
15077/**
Daniel Veillard37334572008-07-31 08:20:02 +000015078 * xmlCtxtUseOptions:
15079 * @ctxt: an XML parser context
15080 * @options: a combination of xmlParserOption
15081 *
15082 * Applies the options to the parser context
15083 *
15084 * Returns 0 in case of success, the set of unknown or unimplemented options
15085 * in case of error.
15086 */
15087int
15088xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15089{
15090 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15091}
15092
15093/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015094 * xmlDoRead:
15095 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015096 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015097 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015098 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015099 * @reuse: keep the context for reuse
15100 *
15101 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015102 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015103 * Returns the resulting document tree or NULL
15104 */
15105static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015106xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15107 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015108{
15109 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015110
15111 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015112 if (encoding != NULL) {
15113 xmlCharEncodingHandlerPtr hdlr;
15114
15115 hdlr = xmlFindCharEncodingHandler(encoding);
15116 if (hdlr != NULL)
15117 xmlSwitchToEncoding(ctxt, hdlr);
15118 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015119 if ((URL != NULL) && (ctxt->input != NULL) &&
15120 (ctxt->input->filename == NULL))
15121 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015122 xmlParseDocument(ctxt);
15123 if ((ctxt->wellFormed) || ctxt->recovery)
15124 ret = ctxt->myDoc;
15125 else {
15126 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015127 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015128 xmlFreeDoc(ctxt->myDoc);
15129 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015130 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015131 ctxt->myDoc = NULL;
15132 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015133 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015134 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015135
15136 return (ret);
15137}
15138
15139/**
15140 * xmlReadDoc:
15141 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015142 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015143 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015144 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015145 *
15146 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015147 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015148 * Returns the resulting document tree
15149 */
15150xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015151xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015152{
15153 xmlParserCtxtPtr ctxt;
15154
15155 if (cur == NULL)
15156 return (NULL);
15157
15158 ctxt = xmlCreateDocParserCtxt(cur);
15159 if (ctxt == NULL)
15160 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015161 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015162}
15163
15164/**
15165 * xmlReadFile:
15166 * @filename: a file or URL
15167 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015168 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015169 *
15170 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015171 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015172 * Returns the resulting document tree
15173 */
15174xmlDocPtr
15175xmlReadFile(const char *filename, const char *encoding, int options)
15176{
15177 xmlParserCtxtPtr ctxt;
15178
Daniel Veillard61b93382003-11-03 14:28:31 +000015179 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015180 if (ctxt == NULL)
15181 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015182 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015183}
15184
15185/**
15186 * xmlReadMemory:
15187 * @buffer: a pointer to a char array
15188 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015189 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015190 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015191 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015192 *
15193 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015194 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015195 * Returns the resulting document tree
15196 */
15197xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015198xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015199{
15200 xmlParserCtxtPtr ctxt;
15201
15202 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15203 if (ctxt == NULL)
15204 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015205 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015206}
15207
15208/**
15209 * xmlReadFd:
15210 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015211 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015212 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015213 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015214 *
15215 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015216 * NOTE that the file descriptor will not be closed when the
15217 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015218 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015219 * Returns the resulting document tree
15220 */
15221xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015222xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015223{
15224 xmlParserCtxtPtr ctxt;
15225 xmlParserInputBufferPtr input;
15226 xmlParserInputPtr stream;
15227
15228 if (fd < 0)
15229 return (NULL);
15230
15231 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15232 if (input == NULL)
15233 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015234 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015235 ctxt = xmlNewParserCtxt();
15236 if (ctxt == NULL) {
15237 xmlFreeParserInputBuffer(input);
15238 return (NULL);
15239 }
15240 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15241 if (stream == NULL) {
15242 xmlFreeParserInputBuffer(input);
15243 xmlFreeParserCtxt(ctxt);
15244 return (NULL);
15245 }
15246 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015247 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015248}
15249
15250/**
15251 * xmlReadIO:
15252 * @ioread: an I/O read function
15253 * @ioclose: an I/O close function
15254 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015255 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015256 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015257 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015258 *
15259 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015260 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015261 * Returns the resulting document tree
15262 */
15263xmlDocPtr
15264xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015265 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266{
15267 xmlParserCtxtPtr ctxt;
15268 xmlParserInputBufferPtr input;
15269 xmlParserInputPtr stream;
15270
15271 if (ioread == NULL)
15272 return (NULL);
15273
15274 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15275 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015276 if (input == NULL) {
15277 if (ioclose != NULL)
15278 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015279 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015280 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015281 ctxt = xmlNewParserCtxt();
15282 if (ctxt == NULL) {
15283 xmlFreeParserInputBuffer(input);
15284 return (NULL);
15285 }
15286 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287 if (stream == NULL) {
15288 xmlFreeParserInputBuffer(input);
15289 xmlFreeParserCtxt(ctxt);
15290 return (NULL);
15291 }
15292 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015293 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015294}
15295
15296/**
15297 * xmlCtxtReadDoc:
15298 * @ctxt: an XML parser context
15299 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015300 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015301 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015302 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015303 *
15304 * parse an XML in-memory document and build a tree.
15305 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015306 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015307 * Returns the resulting document tree
15308 */
15309xmlDocPtr
15310xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015311 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015312{
15313 xmlParserInputPtr stream;
15314
15315 if (cur == NULL)
15316 return (NULL);
15317 if (ctxt == NULL)
15318 return (NULL);
15319
15320 xmlCtxtReset(ctxt);
15321
15322 stream = xmlNewStringInputStream(ctxt, cur);
15323 if (stream == NULL) {
15324 return (NULL);
15325 }
15326 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015327 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015328}
15329
15330/**
15331 * xmlCtxtReadFile:
15332 * @ctxt: an XML parser context
15333 * @filename: a file or URL
15334 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015335 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015336 *
15337 * parse an XML file from the filesystem or the network.
15338 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015339 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015340 * Returns the resulting document tree
15341 */
15342xmlDocPtr
15343xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15344 const char *encoding, int options)
15345{
15346 xmlParserInputPtr stream;
15347
15348 if (filename == NULL)
15349 return (NULL);
15350 if (ctxt == NULL)
15351 return (NULL);
15352
15353 xmlCtxtReset(ctxt);
15354
Daniel Veillard29614c72004-11-26 10:47:26 +000015355 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015356 if (stream == NULL) {
15357 return (NULL);
15358 }
15359 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015360 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015361}
15362
15363/**
15364 * xmlCtxtReadMemory:
15365 * @ctxt: an XML parser context
15366 * @buffer: a pointer to a char array
15367 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015368 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015369 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015370 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015371 *
15372 * parse an XML in-memory document and build a tree.
15373 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015374 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015375 * Returns the resulting document tree
15376 */
15377xmlDocPtr
15378xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015379 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015380{
15381 xmlParserInputBufferPtr input;
15382 xmlParserInputPtr stream;
15383
15384 if (ctxt == NULL)
15385 return (NULL);
15386 if (buffer == NULL)
15387 return (NULL);
15388
15389 xmlCtxtReset(ctxt);
15390
15391 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15392 if (input == NULL) {
15393 return(NULL);
15394 }
15395
15396 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15397 if (stream == NULL) {
15398 xmlFreeParserInputBuffer(input);
15399 return(NULL);
15400 }
15401
15402 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015403 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015404}
15405
15406/**
15407 * xmlCtxtReadFd:
15408 * @ctxt: an XML parser context
15409 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015410 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015411 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015412 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015413 *
15414 * parse an XML from a file descriptor and build a tree.
15415 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015416 * NOTE that the file descriptor will not be closed when the
15417 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015418 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015419 * Returns the resulting document tree
15420 */
15421xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015422xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15423 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015424{
15425 xmlParserInputBufferPtr input;
15426 xmlParserInputPtr stream;
15427
15428 if (fd < 0)
15429 return (NULL);
15430 if (ctxt == NULL)
15431 return (NULL);
15432
15433 xmlCtxtReset(ctxt);
15434
15435
15436 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15437 if (input == NULL)
15438 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015439 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015440 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15441 if (stream == NULL) {
15442 xmlFreeParserInputBuffer(input);
15443 return (NULL);
15444 }
15445 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015446 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015447}
15448
15449/**
15450 * xmlCtxtReadIO:
15451 * @ctxt: an XML parser context
15452 * @ioread: an I/O read function
15453 * @ioclose: an I/O close function
15454 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015455 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015456 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015457 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015458 *
15459 * parse an XML document from I/O functions and source and build a tree.
15460 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015461 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015462 * Returns the resulting document tree
15463 */
15464xmlDocPtr
15465xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15466 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015467 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015468 const char *encoding, int options)
15469{
15470 xmlParserInputBufferPtr input;
15471 xmlParserInputPtr stream;
15472
15473 if (ioread == NULL)
15474 return (NULL);
15475 if (ctxt == NULL)
15476 return (NULL);
15477
15478 xmlCtxtReset(ctxt);
15479
15480 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15481 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015482 if (input == NULL) {
15483 if (ioclose != NULL)
15484 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015485 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015486 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015487 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15488 if (stream == NULL) {
15489 xmlFreeParserInputBuffer(input);
15490 return (NULL);
15491 }
15492 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015493 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015494}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015495
15496#define bottom_parser
15497#include "elfgcchack.h"