blob: 1d478c3d47a387a87d34f7c7e90e231624c8af4c [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800125 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 if (replacement != 0) {
134 if (replacement < XML_MAX_TEXT_LENGTH)
135 return(0);
136
137 /*
138 * If the volume of entity copy reaches 10 times the
139 * amount of parsed data and over the large text threshold
140 * then that's very likely to be an abuse.
141 */
142 if (ctxt->input != NULL) {
143 consumed = ctxt->input->consumed +
144 (ctxt->input->cur - ctxt->input->base);
145 }
146 consumed += ctxt->sizeentities;
147
148 if (replacement < XML_PARSER_NON_LINEAR * consumed)
149 return(0);
150 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000151 /*
152 * Do the check based on the replacement size of the entity
153 */
154 if (size < XML_PARSER_BIG_ENTITY)
155 return(0);
156
157 /*
158 * A limit on the amount of text data reasonably used
159 */
160 if (ctxt->input != NULL) {
161 consumed = ctxt->input->consumed +
162 (ctxt->input->cur - ctxt->input->base);
163 }
164 consumed += ctxt->sizeentities;
165
166 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168 return (0);
169 } else if (ent != NULL) {
170 /*
171 * use the number of parsed entities in the replacement
172 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800173 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000174
175 /*
176 * The amount of data parsed counting entities size only once
177 */
178 if (ctxt->input != NULL) {
179 consumed = ctxt->input->consumed +
180 (ctxt->input->cur - ctxt->input->base);
181 }
182 consumed += ctxt->sizeentities;
183
184 /*
185 * Check the density of entities for the amount of data
186 * knowing an entity reference will take at least 3 bytes
187 */
188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189 return (0);
190 } else {
191 /*
192 * strange we got no data for checking just return
193 */
194 return (0);
195 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 return (1);
198}
199
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000200/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000201 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000203 * arbitrary depth limit for the XML documents that we allow to
204 * process. This is not a limitation of the parser but a safety
205 * boundary feature. It can be disabled with the XML_PARSE_HUGE
206 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000207 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000208unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard0fb18932003-09-07 09:14:37 +0000210
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000213#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000214#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000215#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216
Daniel Veillard1f972e92012-08-15 10:16:37 +0800217/**
218 * XML_PARSER_CHUNK_SIZE
219 *
220 * When calling GROW that's the minimal amount of data
221 * the parser expected to have received. It is not a hard
222 * limit but an optimization when reading strings like Names
223 * It is not strictly needed as long as inputs available characters
224 * are followed by 0, which should be provided by the I/O level
225 */
226#define XML_PARSER_CHUNK_SIZE 100
227
Owen Taylor3473f882001-02-23 17:55:21 +0000228/*
Owen Taylor3473f882001-02-23 17:55:21 +0000229 * List of XML prefixed PI allowed by W3C specs
230 */
231
Daniel Veillardb44025c2001-10-11 22:55:55 +0000232static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000233 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800234 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000235 NULL
236};
237
Daniel Veillarda07050d2003-10-19 14:46:32 +0000238
Owen Taylor3473f882001-02-23 17:55:21 +0000239/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200240static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000242
Daniel Veillard7d515752003-09-26 19:12:37 +0000243static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000244xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000246 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000247 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard37334572008-07-31 08:20:02 +0000249static int
250xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000252#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000253static void
254xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000256#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000257
Daniel Veillard7d515752003-09-26 19:12:37 +0000258static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000259xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000261
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000262static int
263xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264
Daniel Veillarde57ec792003-09-10 10:50:59 +0000265/************************************************************************
266 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800267 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 * *
269 ************************************************************************/
270
271/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 * xmlErrAttributeDup:
273 * @ctxt: an XML parser context
274 * @prefix: the attribute prefix
275 * @localname: the attribute localname
276 *
277 * Handle a redefinition of attribute error
278 */
279static void
280xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281 const xmlChar * localname)
282{
Daniel Veillard157fee02003-10-31 10:36:03 +0000283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284 (ctxt->instate == XML_PARSER_EOF))
285 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000286 if (ctxt != NULL)
287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200288
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000289 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 (const char *) localname, NULL, NULL, 0, 0,
293 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000294 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 (const char *) prefix, (const char *) localname,
298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000300 if (ctxt != NULL) {
301 ctxt->wellFormed = 0;
302 if (ctxt->recovery == 0)
303 ctxt->disableSAX = 1;
304 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305}
306
307/**
308 * xmlFatalErr:
309 * @ctxt: an XML parser context
310 * @error: the error number
311 * @extra: extra information string
312 *
313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314 */
315static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317{
318 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800319 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320
Daniel Veillard157fee02003-10-31 10:36:03 +0000321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322 (ctxt->instate == XML_PARSER_EOF))
323 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 switch (error) {
325 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800326 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800329 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800332 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "internal error";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800338 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800341 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800344 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800350 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800353 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800356 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "Fragment not allowed";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
440 case XML_ERR_CONDSEC_INVALID_KEYWORD:
441 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800442 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800445 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800448 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800451 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800454 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000456 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000492 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800495 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 if (info == NULL)
507 snprintf(errstr, 128, "%s\n", errmsg);
508 else
509 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000510 if (ctxt != NULL)
511 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000514 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL) {
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0)
518 ctxt->disableSAX = 1;
519 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520}
521
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000522/**
523 * xmlFatalErrMsg:
524 * @ctxt: an XML parser context
525 * @error: the error number
526 * @msg: the error message
527 *
528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529 */
530static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000533{
Daniel Veillard157fee02003-10-31 10:36:03 +0000534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535 (ctxt->instate == XML_PARSER_EOF))
536 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000537 if (ctxt != NULL)
538 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000546}
547
548/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000549 * xmlWarningMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 * @str2: extra data
555 *
556 * Handle a warning.
557 */
558static void
559xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg, const xmlChar *str1, const xmlChar *str2)
561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000563
Daniel Veillard157fee02003-10-31 10:36:03 +0000564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000569 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200570 if (ctxt != NULL) {
571 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000572 (ctxt->sax) ? ctxt->sax->warning : NULL,
573 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_WARNING, NULL, 0,
576 (const char *) str1, (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200578 } else {
579 __xmlRaiseError(schannel, NULL, NULL,
580 ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_WARNING, NULL, 0,
582 (const char *) str1, (const char *) str2, NULL, 0, 0,
583 msg, (const char *) str1, (const char *) str2);
584 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585}
586
587/**
588 * xmlValidityError:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the error message
592 * @str1: extra data
593 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000594 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595 */
596static void
597xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000598 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000599{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000600 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000601
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->errNo = error;
607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608 schannel = ctxt->sax->serror;
609 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200610 if (ctxt != NULL) {
611 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000612 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613 ctxt, NULL, XML_FROM_DTD, error,
614 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000615 (const char *) str2, NULL, 0, 0,
616 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000617 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200618 } else {
619 __xmlRaiseError(schannel, NULL, NULL,
620 ctxt, NULL, XML_FROM_DTD, error,
621 XML_ERR_ERROR, NULL, 0, (const char *) str1,
622 (const char *) str2, NULL, 0, 0,
623 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000624 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000625}
626
627/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000628 * xmlFatalErrMsgInt:
629 * @ctxt: an XML parser context
630 * @error: the error number
631 * @msg: the error message
632 * @val: an integer value
633 *
634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635 */
636static void
637xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000638 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000639{
Daniel Veillard157fee02003-10-31 10:36:03 +0000640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000643 if (ctxt != NULL)
644 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000645 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000648 if (ctxt != NULL) {
649 ctxt->wellFormed = 0;
650 if (ctxt->recovery == 0)
651 ctxt->disableSAX = 1;
652 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000653}
654
655/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000656 * xmlFatalErrMsgStrIntStr:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @str1: an string info
661 * @val: an integer value
662 * @str2: an string info
663 *
664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665 */
666static void
667xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800668 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000669 const xmlChar *str2)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678 NULL, 0, (const char *) str1, (const char *) str2,
679 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000680 if (ctxt != NULL) {
681 ctxt->wellFormed = 0;
682 if (ctxt->recovery == 0)
683 ctxt->disableSAX = 1;
684 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000685}
686
687/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000688 * xmlFatalErrMsgStr:
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the error message
692 * @val: a string value
693 *
694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695 */
696static void
697xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000698 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000699{
Daniel Veillard157fee02003-10-31 10:36:03 +0000700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701 (ctxt->instate == XML_PARSER_EOF))
702 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000706 XML_FROM_PARSER, error, XML_ERR_FATAL,
707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL) {
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
713 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000714}
715
716/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000717 * xmlErrMsgStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @val: a string value
722 *
723 * Handle a non fatal parser error
724 */
725static void
726xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727 const char *msg, const xmlChar * val)
728{
Daniel Veillard157fee02003-10-31 10:36:03 +0000729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730 (ctxt->instate == XML_PARSER_EOF))
731 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000732 if (ctxt != NULL)
733 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000735 XML_FROM_PARSER, error, XML_ERR_ERROR,
736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737 val);
738}
739
740/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000741 * xmlNsErr:
742 * @ctxt: an XML parser context
743 * @error: the error number
744 * @msg: the message
745 * @info1: extra information string
746 * @info2: extra information string
747 *
748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749 */
750static void
751xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000753 const xmlChar * info1, const xmlChar * info2,
754 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000755{
Daniel Veillard157fee02003-10-31 10:36:03 +0000756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757 (ctxt->instate == XML_PARSER_EOF))
758 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000759 if (ctxt != NULL)
760 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000762 XML_ERR_ERROR, NULL, 0, (const char *) info1,
763 (const char *) info2, (const char *) info3, 0, 0, msg,
764 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000765 if (ctxt != NULL)
766 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000767}
768
Daniel Veillard37334572008-07-31 08:20:02 +0000769/**
770 * xmlNsWarn
771 * @ctxt: an XML parser context
772 * @error: the error number
773 * @msg: the message
774 * @info1: extra information string
775 * @info2: extra information string
776 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800777 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000778 */
779static void
780xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781 const char *msg,
782 const xmlChar * info1, const xmlChar * info2,
783 const xmlChar * info3)
784{
785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786 (ctxt->instate == XML_PARSER_EOF))
787 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789 XML_ERR_WARNING, NULL, 0, (const char *) info1,
790 (const char *) info2, (const char *) info3, 0, 0, msg,
791 info1, info2, info3);
792}
793
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000794/************************************************************************
795 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800796 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797 * *
798 ************************************************************************/
799
800/**
801 * xmlHasFeature:
802 * @feature: the feature to be examined
803 *
804 * Examines if the library has been compiled with a given feature.
805 *
806 * Returns a non-zero value if the feature exist, otherwise zero.
807 * Returns zero (0) if the feature does not exist or an unknown
808 * unknown feature is requested, non-zero otherwise.
809 */
810int
811xmlHasFeature(xmlFeature feature)
812{
813 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_THREAD_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_TREE_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_OUTPUT_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_PUSH_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_READER_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_PATTERN_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_WRITER_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_SAX1_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_FTP_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_HTTP_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_VALID_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_HTML_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_LEGACY_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_C14N_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_CATALOG_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_XPATH_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_XPTR_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_XINCLUDE_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_ICONV_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_ISO8859X_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_UNICODE_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_REGEXP_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_AUTOMATA_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_EXPR_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_SCHEMAS_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_SCHEMATRON_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_MODULES_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_DEBUG_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef DEBUG_MEMORY_LOCATION
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_DEBUG_RUNTIME
990 return(1);
991#else
992 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000993#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000994 case XML_WITH_ZLIB:
995#ifdef LIBXML_ZLIB_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001000 case XML_WITH_LZMA:
1001#ifdef LIBXML_LZMA_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 case XML_WITH_ICU:
1007#ifdef LIBXML_ICU_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012 default:
1013 break;
1014 }
1015 return(0);
1016}
1017
1018/************************************************************************
1019 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001020 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 * *
1022 ************************************************************************/
1023
1024/**
1025 * xmlDetectSAX2:
1026 * @ctxt: an XML parser context
1027 *
1028 * Do the SAX2 detection and specific intialization
1029 */
1030static void
1031xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035 ((ctxt->sax->startElementNs != NULL) ||
1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001037#else
1038 ctxt->sax2 = 1;
1039#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040
1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001046 xmlErrMemory(ctxt, NULL);
1047 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001048}
1049
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050typedef struct _xmlDefAttrs xmlDefAttrs;
1051typedef xmlDefAttrs *xmlDefAttrsPtr;
1052struct _xmlDefAttrs {
1053 int nbAttrs; /* number of defaulted attributes on that element */
1054 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001055 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057
1058/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001059 * xmlAttrNormalizeSpace:
1060 * @src: the source string
1061 * @dst: the target string
1062 *
1063 * Normalize the space in non CDATA attribute values:
1064 * If the attribute type is not CDATA, then the XML processor MUST further
1065 * process the normalized attribute value by discarding any leading and
1066 * trailing space (#x20) characters, and by replacing sequences of space
1067 * (#x20) characters by a single space (#x20) character.
1068 * Note that the size of dst need to be at least src, and if one doesn't need
1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070 * passing src as dst is just fine.
1071 *
1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073 * is needed.
1074 */
1075static xmlChar *
1076xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077{
1078 if ((src == NULL) || (dst == NULL))
1079 return(NULL);
1080
1081 while (*src == 0x20) src++;
1082 while (*src != 0) {
1083 if (*src == 0x20) {
1084 while (*src == 0x20) src++;
1085 if (*src != 0)
1086 *dst++ = 0x20;
1087 } else {
1088 *dst++ = *src++;
1089 }
1090 }
1091 *dst = 0;
1092 if (dst == src)
1093 return(NULL);
1094 return(dst);
1095}
1096
1097/**
1098 * xmlAttrNormalizeSpace2:
1099 * @src: the source string
1100 *
1101 * Normalize the space in non CDATA attribute values, a slightly more complex
1102 * front end to avoid allocation problems when running on attribute values
1103 * coming from the input.
1104 *
1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106 * is needed.
1107 */
1108static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001109xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001110{
1111 int i;
1112 int remove_head = 0;
1113 int need_realloc = 0;
1114 const xmlChar *cur;
1115
1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117 return(NULL);
1118 i = *len;
1119 if (i <= 0)
1120 return(NULL);
1121
1122 cur = src;
1123 while (*cur == 0x20) {
1124 cur++;
1125 remove_head++;
1126 }
1127 while (*cur != 0) {
1128 if (*cur == 0x20) {
1129 cur++;
1130 if ((*cur == 0x20) || (*cur == 0)) {
1131 need_realloc = 1;
1132 break;
1133 }
1134 } else
1135 cur++;
1136 }
1137 if (need_realloc) {
1138 xmlChar *ret;
1139
1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141 if (ret == NULL) {
1142 xmlErrMemory(ctxt, NULL);
1143 return(NULL);
1144 }
1145 xmlAttrNormalizeSpace(ret, ret);
1146 *len = (int) strlen((const char *)ret);
1147 return(ret);
1148 } else if (remove_head) {
1149 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 memmove(src, src + remove_head, 1 + *len);
1151 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001152 }
1153 return(NULL);
1154}
1155
1156/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 * xmlAddDefAttrs:
1158 * @ctxt: an XML parser context
1159 * @fullname: the element fullname
1160 * @fullattr: the attribute fullname
1161 * @value: the attribute value
1162 *
1163 * Add a defaulted attribute for an element
1164 */
1165static void
1166xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167 const xmlChar *fullname,
1168 const xmlChar *fullattr,
1169 const xmlChar *value) {
1170 xmlDefAttrsPtr defaults;
1171 int len;
1172 const xmlChar *name;
1173 const xmlChar *prefix;
1174
Daniel Veillard6a31b832008-03-26 14:06:44 +00001175 /*
1176 * Allows to detect attribute redefinitions
1177 */
1178 if (ctxt->attsSpecial != NULL) {
1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180 return;
1181 }
1182
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 if (ctxt->attsDefault == NULL)
1186 goto mem_error;
1187 }
1188
1189 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 * split the element name into prefix:localname , the string found
1191 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 */
1193 name = xmlSplitQName3(fullname, &len);
1194 if (name == NULL) {
1195 name = xmlDictLookup(ctxt->dict, fullname, -1);
1196 prefix = NULL;
1197 } else {
1198 name = xmlDictLookup(ctxt->dict, name, -1);
1199 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200 }
1201
1202 /*
1203 * make sure there is some storage
1204 */
1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206 if (defaults == NULL) {
1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 if (defaults == NULL)
1210 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001212 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214 defaults, NULL) < 0) {
1215 xmlFree(defaults);
1216 goto mem_error;
1217 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001219 xmlDefAttrsPtr temp;
1220
1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001223 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228 defaults, NULL) < 0) {
1229 xmlFree(defaults);
1230 goto mem_error;
1231 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 }
1233
1234 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001235 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001236 * are within the DTD and hen not associated to namespace names.
1237 */
1238 name = xmlSplitQName3(fullattr, &len);
1239 if (name == NULL) {
1240 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241 prefix = NULL;
1242 } else {
1243 name = xmlDictLookup(ctxt->dict, name, -1);
1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245 }
1246
Daniel Veillardae0765b2008-07-31 19:54:59 +00001247 defaults->values[5 * defaults->nbAttrs] = name;
1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 /* intern the string and precompute the end */
1250 len = xmlStrlen(value);
1251 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001252 defaults->values[5 * defaults->nbAttrs + 2] = value;
1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254 if (ctxt->external)
1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256 else
1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 defaults->nbAttrs++;
1259
1260 return;
1261
1262mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001263 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 return;
1265}
1266
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001267/**
1268 * xmlAddSpecialAttr:
1269 * @ctxt: an XML parser context
1270 * @fullname: the element fullname
1271 * @fullattr: the attribute fullname
1272 * @type: the attribute type
1273 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001275 */
1276static void
1277xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278 const xmlChar *fullname,
1279 const xmlChar *fullattr,
1280 int type)
1281{
1282 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001284 if (ctxt->attsSpecial == NULL)
1285 goto mem_error;
1286 }
1287
Daniel Veillardac4118d2008-01-11 05:27:32 +00001288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289 return;
1290
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001293 return;
1294
1295mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001296 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001297 return;
1298}
1299
Daniel Veillard4432df22003-09-28 18:58:27 +00001300/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001301 * xmlCleanSpecialAttrCallback:
1302 *
1303 * Removes CDATA attributes from the special attribute table
1304 */
1305static void
1306xmlCleanSpecialAttrCallback(void *payload, void *data,
1307 const xmlChar *fullname, const xmlChar *fullattr,
1308 const xmlChar *unused ATTRIBUTE_UNUSED) {
1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001311 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313 }
1314}
1315
1316/**
1317 * xmlCleanSpecialAttr:
1318 * @ctxt: an XML parser context
1319 *
1320 * Trim the list of attributes defined to remove all those of type
1321 * CDATA as they are not special. This call should be done when finishing
1322 * to parse the DTD and before starting to parse the document root.
1323 */
1324static void
1325xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326{
1327 if (ctxt->attsSpecial == NULL)
1328 return;
1329
1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331
1332 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333 xmlHashFree(ctxt->attsSpecial, NULL);
1334 ctxt->attsSpecial = NULL;
1335 }
1336 return;
1337}
1338
1339/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001340 * xmlCheckLanguageID:
1341 * @lang: pointer to the string value
1342 *
1343 * Checks that the value conforms to the LanguageID production:
1344 *
1345 * NOTE: this is somewhat deprecated, those productions were removed from
1346 * the XML Second edition.
1347 *
1348 * [33] LanguageID ::= Langcode ('-' Subcode)*
1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353 * [38] Subcode ::= ([a-z] | [A-Z])+
1354 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001355 * The current REC reference the sucessors of RFC 1766, currently 5646
1356 *
1357 * http://www.rfc-editor.org/rfc/rfc5646.txt
1358 * langtag = language
1359 * ["-" script]
1360 * ["-" region]
1361 * *("-" variant)
1362 * *("-" extension)
1363 * ["-" privateuse]
1364 * language = 2*3ALPHA ; shortest ISO 639 code
1365 * ["-" extlang] ; sometimes followed by
1366 * ; extended language subtags
1367 * / 4ALPHA ; or reserved for future use
1368 * / 5*8ALPHA ; or registered language subtag
1369 *
1370 * extlang = 3ALPHA ; selected ISO 639 codes
1371 * *2("-" 3ALPHA) ; permanently reserved
1372 *
1373 * script = 4ALPHA ; ISO 15924 code
1374 *
1375 * region = 2ALPHA ; ISO 3166-1 code
1376 * / 3DIGIT ; UN M.49 code
1377 *
1378 * variant = 5*8alphanum ; registered variants
1379 * / (DIGIT 3alphanum)
1380 *
1381 * extension = singleton 1*("-" (2*8alphanum))
1382 *
1383 * ; Single alphanumerics
1384 * ; "x" reserved for private use
1385 * singleton = DIGIT ; 0 - 9
1386 * / %x41-57 ; A - W
1387 * / %x59-5A ; Y - Z
1388 * / %x61-77 ; a - w
1389 * / %x79-7A ; y - z
1390 *
1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392 * The parser below doesn't try to cope with extension or privateuse
1393 * that could be added but that's not interoperable anyway
1394 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001395 * Returns 1 if correct 0 otherwise
1396 **/
1397int
1398xmlCheckLanguageID(const xmlChar * lang)
1399{
Daniel Veillard60587d62010-11-04 15:16:27 +01001400 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001401
1402 if (cur == NULL)
1403 return (0);
1404 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001405 ((cur[0] == 'I') && (cur[1] == '-')) ||
1406 ((cur[0] == 'x') && (cur[1] == '-')) ||
1407 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001408 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001409 * Still allow IANA code and user code which were coming
1410 * from the previous version of the XML-1.0 specification
1411 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001412 */
1413 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001415 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001417 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001418 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001419 nxt = cur;
1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422 nxt++;
1423 if (nxt - cur >= 4) {
1424 /*
1425 * Reserved
1426 */
1427 if ((nxt - cur > 8) || (nxt[0] != 0))
1428 return(0);
1429 return(1);
1430 }
1431 if (nxt - cur < 2)
1432 return(0);
1433 /* we got an ISO 639 code */
1434 if (nxt[0] == 0)
1435 return(1);
1436 if (nxt[0] != '-')
1437 return(0);
1438
1439 nxt++;
1440 cur = nxt;
1441 /* now we can have extlang or script or region or variant */
1442 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443 goto region_m49;
1444
1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447 nxt++;
1448 if (nxt - cur == 4)
1449 goto script;
1450 if (nxt - cur == 2)
1451 goto region;
1452 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453 goto variant;
1454 if (nxt - cur != 3)
1455 return(0);
1456 /* we parsed an extlang */
1457 if (nxt[0] == 0)
1458 return(1);
1459 if (nxt[0] != '-')
1460 return(0);
1461
1462 nxt++;
1463 cur = nxt;
1464 /* now we can have script or region or variant */
1465 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466 goto region_m49;
1467
1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470 nxt++;
1471 if (nxt - cur == 2)
1472 goto region;
1473 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474 goto variant;
1475 if (nxt - cur != 4)
1476 return(0);
1477 /* we parsed a script */
1478script:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483
1484 nxt++;
1485 cur = nxt;
1486 /* now we can have region or variant */
1487 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488 goto region_m49;
1489
1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492 nxt++;
1493
1494 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495 goto variant;
1496 if (nxt - cur != 2)
1497 return(0);
1498 /* we parsed a region */
1499region:
1500 if (nxt[0] == 0)
1501 return(1);
1502 if (nxt[0] != '-')
1503 return(0);
1504
1505 nxt++;
1506 cur = nxt;
1507 /* now we can just have a variant */
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511
1512 if ((nxt - cur < 5) || (nxt - cur > 8))
1513 return(0);
1514
1515 /* we parsed a variant */
1516variant:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001522 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001523
1524region_m49:
1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527 nxt += 3;
1528 goto region;
1529 }
1530 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001531}
1532
Owen Taylor3473f882001-02-23 17:55:21 +00001533/************************************************************************
1534 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001535 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001536 * *
1537 ************************************************************************/
1538
Daniel Veillard8ed10722009-08-20 19:17:36 +02001539static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001541
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542#ifdef SAX2
1543/**
1544 * nsPush:
1545 * @ctxt: an XML parser context
1546 * @prefix: the namespace prefix or NULL
1547 * @URL: the namespace name
1548 *
1549 * Pushes a new parser namespace on top of the ns stack
1550 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001551 * Returns -1 in case of error, -2 if the namespace should be discarded
1552 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 */
1554static int
1555nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001557 if (ctxt->options & XML_PARSE_NSCLEAN) {
1558 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001560 if (ctxt->nsTab[i] == prefix) {
1561 /* in scope */
1562 if (ctxt->nsTab[i + 1] == URL)
1563 return(-2);
1564 /* out of scope keep it */
1565 break;
1566 }
1567 }
1568 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570 ctxt->nsMax = 10;
1571 ctxt->nsNr = 0;
1572 ctxt->nsTab = (const xmlChar **)
1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001575 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001576 ctxt->nsMax = 0;
1577 return (-1);
1578 }
1579 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001580 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001586 ctxt->nsMax /= 2;
1587 return (-1);
1588 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001589 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 }
1591 ctxt->nsTab[ctxt->nsNr++] = prefix;
1592 ctxt->nsTab[ctxt->nsNr++] = URL;
1593 return (ctxt->nsNr);
1594}
1595/**
1596 * nsPop:
1597 * @ctxt: an XML parser context
1598 * @nr: the number to pop
1599 *
1600 * Pops the top @nr parser prefix/namespace from the ns stack
1601 *
1602 * Returns the number of namespaces removed
1603 */
1604static int
1605nsPop(xmlParserCtxtPtr ctxt, int nr)
1606{
1607 int i;
1608
1609 if (ctxt->nsTab == NULL) return(0);
1610 if (ctxt->nsNr < nr) {
1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612 nr = ctxt->nsNr;
1613 }
1614 if (ctxt->nsNr <= 0)
1615 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001616
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 for (i = 0;i < nr;i++) {
1618 ctxt->nsNr--;
1619 ctxt->nsTab[ctxt->nsNr] = NULL;
1620 }
1621 return(nr);
1622}
1623#endif
1624
1625static int
1626xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001628 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001629 int maxatts;
1630
1631 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001633 atts = (const xmlChar **)
1634 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638 if (attallocs == NULL) goto mem_error;
1639 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001640 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001641 } else if (nr + 5 > ctxt->maxatts) {
1642 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001645 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001646 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648 (maxatts / 5) * sizeof(int));
1649 if (attallocs == NULL) goto mem_error;
1650 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001651 ctxt->maxatts = maxatts;
1652 }
1653 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001654mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657}
1658
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659/**
1660 * inputPush:
1661 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001662 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 *
1664 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001665 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001666 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001667 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001668int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001671 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001672 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 if (ctxt->inputNr >= ctxt->inputMax) {
1674 ctxt->inputMax *= 2;
1675 ctxt->inputTab =
1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677 ctxt->inputMax *
1678 sizeof(ctxt->inputTab[0]));
1679 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001681 xmlFreeInputStream(value);
1682 ctxt->inputMax /= 2;
1683 value = NULL;
1684 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001685 }
1686 }
1687 ctxt->inputTab[ctxt->inputNr] = value;
1688 ctxt->input = value;
1689 return (ctxt->inputNr++);
1690}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001693 * @ctxt: an XML parser context
1694 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001696 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001699xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700inputPop(xmlParserCtxtPtr ctxt)
1701{
1702 xmlParserInputPtr ret;
1703
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001704 if (ctxt == NULL)
1705 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001707 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 ctxt->inputNr--;
1709 if (ctxt->inputNr > 0)
1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711 else
1712 ctxt->input = NULL;
1713 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001714 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715 return (ret);
1716}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001717/**
1718 * nodePush:
1719 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001720 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 *
1722 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001723 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001724 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001729 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001731 xmlNodePtr *tmp;
1732
1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001736 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001737 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001738 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001739 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001740 ctxt->nodeTab = tmp;
1741 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001742 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001747 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001748 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001749 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001750 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001751 ctxt->nodeTab[ctxt->nodeNr] = value;
1752 ctxt->node = value;
1753 return (ctxt->nodeNr++);
1754}
Daniel Veillard8915c152008-08-26 13:05:34 +00001755
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756/**
1757 * nodePop:
1758 * @ctxt: an XML parser context
1759 *
1760 * Pops the top element node from the node stack
1761 *
1762 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001763 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001764xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765nodePop(xmlParserCtxtPtr ctxt)
1766{
1767 xmlNodePtr ret;
1768
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001771 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001772 ctxt->nodeNr--;
1773 if (ctxt->nodeNr > 0)
1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775 else
1776 ctxt->node = NULL;
1777 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001778 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 return (ret);
1780}
Daniel Veillarda2351322004-06-27 12:08:10 +00001781
1782#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001784 * nameNsPush:
1785 * @ctxt: an XML parser context
1786 * @value: the element name
1787 * @prefix: the element prefix
1788 * @URI: the element namespace name
1789 *
1790 * Pushes a new element name/prefix/URL on top of the name stack
1791 *
1792 * Returns -1 in case of error, the index in the stack otherwise
1793 */
1794static int
1795nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797{
1798 if (ctxt->nameNr >= ctxt->nameMax) {
1799 const xmlChar * *tmp;
1800 void **tmp2;
1801 ctxt->nameMax *= 2;
1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803 ctxt->nameMax *
1804 sizeof(ctxt->nameTab[0]));
1805 if (tmp == NULL) {
1806 ctxt->nameMax /= 2;
1807 goto mem_error;
1808 }
1809 ctxt->nameTab = tmp;
1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811 ctxt->nameMax * 3 *
1812 sizeof(ctxt->pushTab[0]));
1813 if (tmp2 == NULL) {
1814 ctxt->nameMax /= 2;
1815 goto mem_error;
1816 }
1817 ctxt->pushTab = tmp2;
1818 }
1819 ctxt->nameTab[ctxt->nameNr] = value;
1820 ctxt->name = value;
1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 return (ctxt->nameNr++);
1825mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 return (-1);
1828}
1829/**
1830 * nameNsPop:
1831 * @ctxt: an XML parser context
1832 *
1833 * Pops the top element/prefix/URI name from the name stack
1834 *
1835 * Returns the name just removed
1836 */
1837static const xmlChar *
1838nameNsPop(xmlParserCtxtPtr ctxt)
1839{
1840 const xmlChar *ret;
1841
1842 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001843 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844 ctxt->nameNr--;
1845 if (ctxt->nameNr > 0)
1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847 else
1848 ctxt->name = NULL;
1849 ret = ctxt->nameTab[ctxt->nameNr];
1850 ctxt->nameTab[ctxt->nameNr] = NULL;
1851 return (ret);
1852}
Daniel Veillarda2351322004-06-27 12:08:10 +00001853#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854
1855/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001856 * namePush:
1857 * @ctxt: an XML parser context
1858 * @value: the element name
1859 *
1860 * Pushes a new element name on top of the name stack
1861 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001864int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001865namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001866{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001867 if (ctxt == NULL) return (-1);
1868
Daniel Veillard1c732d22002-11-30 11:22:59 +00001869 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001872 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001873 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001874 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001875 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001878 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001879 }
1880 ctxt->nameTab[ctxt->nameNr] = value;
1881 ctxt->name = value;
1882 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001883mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001884 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886}
1887/**
1888 * namePop:
1889 * @ctxt: an XML parser context
1890 *
1891 * Pops the top element name from the name stack
1892 *
1893 * Returns the name just removed
1894 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001895const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896namePop(xmlParserCtxtPtr ctxt)
1897{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001898 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001899
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001900 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 ctxt->nameNr--;
1903 if (ctxt->nameNr > 0)
1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905 else
1906 ctxt->name = NULL;
1907 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001908 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 return (ret);
1910}
Owen Taylor3473f882001-02-23 17:55:21 +00001911
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001912static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001913 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001914 int *tmp;
1915
Owen Taylor3473f882001-02-23 17:55:21 +00001916 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001917 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001920 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001921 ctxt->spaceMax /=2;
1922 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001923 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001924 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 }
1926 ctxt->spaceTab[ctxt->spaceNr] = val;
1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928 return(ctxt->spaceNr++);
1929}
1930
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001931static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 int ret;
1933 if (ctxt->spaceNr <= 0) return(0);
1934 ctxt->spaceNr--;
1935 if (ctxt->spaceNr > 0)
1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001938 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001939 ret = ctxt->spaceTab[ctxt->spaceNr];
1940 ctxt->spaceTab[ctxt->spaceNr] = -1;
1941 return(ret);
1942}
1943
1944/*
1945 * Macros for accessing the content. Those should be used only by the parser,
1946 * and not exported.
1947 *
1948 * Dirty macros, i.e. one often need to make assumption on the context to
1949 * use them
1950 *
1951 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952 * To be used with extreme caution since operations consuming
1953 * characters may move the input buffer to a different location !
1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955 * This should be used internally by the parser
1956 * only to compare to ASCII values otherwise it would break when
1957 * running with UTF-8 encoding.
1958 * RAW same as CUR but in the input buffer, bypass any token
1959 * extraction that may have been done
1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961 * to compare on ASCII based substring.
1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001963 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967 *
1968 * NEXT Skip to the next character, this does the proper decoding
1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001970 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001971 * CUR_CHAR(l) returns the current unicode character (int), set l
1972 * to the number of xmlChars used for the encoding [0-5].
1973 * CUR_SCHAR same but operate on a string instead of the context
1974 * COPY_BUF copy the current unicode char to the target buffer, increment
1975 * the index
1976 * GROW, SHRINK handling of input buffers
1977 */
1978
Daniel Veillardfdc91562002-07-01 21:52:03 +00001979#define RAW (*ctxt->input->cur)
1980#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001981#define NXT(val) ctxt->input->cur[(val)]
1982#define CUR_PTR ctxt->input->cur
1983
Daniel Veillarda07050d2003-10-19 14:46:32 +00001984#define CMP4( s, c1, c2, c3, c4 ) \
1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997 ((unsigned char *) s)[ 8 ] == c9 )
1998#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000 ((unsigned char *) s)[ 9 ] == c10 )
2001
Owen Taylor3473f882001-02-23 17:55:21 +00002002#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002005 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007 xmlPopInput(ctxt); \
2008 } while (0)
2009
Daniel Veillard0b787f32004-03-26 17:29:53 +00002010#define SKIPL(val) do { \
2011 int skipl; \
2012 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002013 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002014 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002015 } else ctxt->input->col++; \
2016 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002017 ctxt->input->cur++; \
2018 } \
2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 if ((*ctxt->input->cur == 0) && \
2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022 xmlPopInput(ctxt); \
2023 } while (0)
2024
Daniel Veillarda880b122003-04-21 21:36:41 +00002025#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002028 xmlSHRINK (ctxt);
2029
2030static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031 xmlParserInputShrink(ctxt->input);
2032 if ((*ctxt->input->cur == 0) &&
2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002035 }
Owen Taylor3473f882001-02-23 17:55:21 +00002036
Daniel Veillarda880b122003-04-21 21:36:41 +00002037#define GROW if ((ctxt->progressive == 0) && \
2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002039 xmlGROW (ctxt);
2040
2041static void xmlGROW (xmlParserCtxtPtr ctxt) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002042 if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002044 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002045 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002047 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002048 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002049 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002050 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002051 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2052 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002053}
Owen Taylor3473f882001-02-23 17:55:21 +00002054
2055#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2056
2057#define NEXT xmlNextChar(ctxt)
2058
Daniel Veillard21a0f912001-02-25 19:54:14 +00002059#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002060 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002061 ctxt->input->cur++; \
2062 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002063 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002064 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2065 }
2066
Owen Taylor3473f882001-02-23 17:55:21 +00002067#define NEXTL(l) do { \
2068 if (*(ctxt->input->cur) == '\n') { \
2069 ctxt->input->line++; ctxt->input->col = 1; \
2070 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002071 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002072 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002073 } while (0)
2074
2075#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2077
2078#define COPY_BUF(l,b,i,v) \
2079 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002080 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002081
2082/**
2083 * xmlSkipBlankChars:
2084 * @ctxt: the XML parser context
2085 *
2086 * skip all blanks character found at that point in the input streams.
2087 * It pops up finished entities in the process if allowable at that point.
2088 *
2089 * Returns the number of space chars skipped
2090 */
2091
2092int
2093xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002094 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002095
2096 /*
2097 * It's Okay to use CUR/NEXT here since all the blanks are on
2098 * the ASCII range.
2099 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002100 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2101 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002102 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002103 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002104 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002105 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002106 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002107 if (*cur == '\n') {
2108 ctxt->input->line++; ctxt->input->col = 1;
2109 }
2110 cur++;
2111 res++;
2112 if (*cur == 0) {
2113 ctxt->input->cur = cur;
2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115 cur = ctxt->input->cur;
2116 }
2117 }
2118 ctxt->input->cur = cur;
2119 } else {
2120 int cur;
2121 do {
2122 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002123 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002124 NEXT;
2125 cur = CUR;
2126 res++;
2127 }
2128 while ((cur == 0) && (ctxt->inputNr > 1) &&
2129 (ctxt->instate != XML_PARSER_COMMENT)) {
2130 xmlPopInput(ctxt);
2131 cur = CUR;
2132 }
2133 /*
2134 * Need to handle support of entities branching here
2135 */
2136 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2138 }
Owen Taylor3473f882001-02-23 17:55:21 +00002139 return(res);
2140}
2141
2142/************************************************************************
2143 * *
2144 * Commodity functions to handle entities *
2145 * *
2146 ************************************************************************/
2147
2148/**
2149 * xmlPopInput:
2150 * @ctxt: an XML parser context
2151 *
2152 * xmlPopInput: the current input pointed by ctxt->input came to an end
2153 * pop it and return the next char.
2154 *
2155 * Returns the current xmlChar in the parser context
2156 */
2157xmlChar
2158xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002159 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002160 if (xmlParserDebugEntities)
2161 xmlGenericError(xmlGenericErrorContext,
2162 "Popping input %d\n", ctxt->inputNr);
2163 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002164 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002165 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166 return(xmlPopInput(ctxt));
2167 return(CUR);
2168}
2169
2170/**
2171 * xmlPushInput:
2172 * @ctxt: an XML parser context
2173 * @input: an XML parser input fragment (entity, XML fragment ...).
2174 *
2175 * xmlPushInput: switch to a new input stream which is stacked on top
2176 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002177 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002178 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002179int
Owen Taylor3473f882001-02-23 17:55:21 +00002180xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002181 int ret;
2182 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002183
2184 if (xmlParserDebugEntities) {
2185 if ((ctxt->input != NULL) && (ctxt->input->filename))
2186 xmlGenericError(xmlGenericErrorContext,
2187 "%s(%d): ", ctxt->input->filename,
2188 ctxt->input->line);
2189 xmlGenericError(xmlGenericErrorContext,
2190 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2191 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002192 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002193 if (ctxt->instate == XML_PARSER_EOF)
2194 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002195 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002196 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002197}
2198
2199/**
2200 * xmlParseCharRef:
2201 * @ctxt: an XML parser context
2202 *
2203 * parse Reference declarations
2204 *
2205 * [66] CharRef ::= '&#' [0-9]+ ';' |
2206 * '&#x' [0-9a-fA-F]+ ';'
2207 *
2208 * [ WFC: Legal Character ]
2209 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002210 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002211 *
2212 * Returns the value parsed (as an int), 0 in case of error
2213 */
2214int
2215xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002216 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002217 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002218 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002219
Owen Taylor3473f882001-02-23 17:55:21 +00002220 /*
2221 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2222 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002223 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002224 (NXT(2) == 'x')) {
2225 SKIP(3);
2226 GROW;
2227 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002228 if (count++ > 20) {
2229 count = 0;
2230 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002231 if (ctxt->instate == XML_PARSER_EOF)
2232 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002233 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002234 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002235 val = val * 16 + (CUR - '0');
2236 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237 val = val * 16 + (CUR - 'a') + 10;
2238 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239 val = val * 16 + (CUR - 'A') + 10;
2240 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002241 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 val = 0;
2243 break;
2244 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002245 if (val > 0x10FFFF)
2246 outofrange = val;
2247
Owen Taylor3473f882001-02-23 17:55:21 +00002248 NEXT;
2249 count++;
2250 }
2251 if (RAW == ';') {
2252 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002253 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002254 ctxt->nbChars ++;
2255 ctxt->input->cur++;
2256 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002257 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002258 SKIP(2);
2259 GROW;
2260 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002261 if (count++ > 20) {
2262 count = 0;
2263 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002264 if (ctxt->instate == XML_PARSER_EOF)
2265 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002266 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002267 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002268 val = val * 10 + (CUR - '0');
2269 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002271 val = 0;
2272 break;
2273 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002274 if (val > 0x10FFFF)
2275 outofrange = val;
2276
Owen Taylor3473f882001-02-23 17:55:21 +00002277 NEXT;
2278 count++;
2279 }
2280 if (RAW == ';') {
2281 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002282 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002283 ctxt->nbChars ++;
2284 ctxt->input->cur++;
2285 }
2286 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002287 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002288 }
2289
2290 /*
2291 * [ WFC: Legal Character ]
2292 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002293 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002294 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002295 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002296 return(val);
2297 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002298 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299 "xmlParseCharRef: invalid xmlChar value %d\n",
2300 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002301 }
2302 return(0);
2303}
2304
2305/**
2306 * xmlParseStringCharRef:
2307 * @ctxt: an XML parser context
2308 * @str: a pointer to an index in the string
2309 *
2310 * parse Reference declarations, variant parsing from a string rather
2311 * than an an input flow.
2312 *
2313 * [66] CharRef ::= '&#' [0-9]+ ';' |
2314 * '&#x' [0-9a-fA-F]+ ';'
2315 *
2316 * [ WFC: Legal Character ]
2317 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002318 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002319 *
2320 * Returns the value parsed (as an int), 0 in case of error, str will be
2321 * updated to the current value of the index
2322 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002323static int
Owen Taylor3473f882001-02-23 17:55:21 +00002324xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2325 const xmlChar *ptr;
2326 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002327 unsigned int val = 0;
2328 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002329
2330 if ((str == NULL) || (*str == NULL)) return(0);
2331 ptr = *str;
2332 cur = *ptr;
2333 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2334 ptr += 3;
2335 cur = *ptr;
2336 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002337 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002338 val = val * 16 + (cur - '0');
2339 else if ((cur >= 'a') && (cur <= 'f'))
2340 val = val * 16 + (cur - 'a') + 10;
2341 else if ((cur >= 'A') && (cur <= 'F'))
2342 val = val * 16 + (cur - 'A') + 10;
2343 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002344 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002345 val = 0;
2346 break;
2347 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002348 if (val > 0x10FFFF)
2349 outofrange = val;
2350
Owen Taylor3473f882001-02-23 17:55:21 +00002351 ptr++;
2352 cur = *ptr;
2353 }
2354 if (cur == ';')
2355 ptr++;
2356 } else if ((cur == '&') && (ptr[1] == '#')){
2357 ptr += 2;
2358 cur = *ptr;
2359 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002360 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002361 val = val * 10 + (cur - '0');
2362 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002363 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002364 val = 0;
2365 break;
2366 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002367 if (val > 0x10FFFF)
2368 outofrange = val;
2369
Owen Taylor3473f882001-02-23 17:55:21 +00002370 ptr++;
2371 cur = *ptr;
2372 }
2373 if (cur == ';')
2374 ptr++;
2375 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002376 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002377 return(0);
2378 }
2379 *str = ptr;
2380
2381 /*
2382 * [ WFC: Legal Character ]
2383 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002384 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002385 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002386 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002387 return(val);
2388 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002389 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2391 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002392 }
2393 return(0);
2394}
2395
2396/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002397 * xmlNewBlanksWrapperInputStream:
2398 * @ctxt: an XML parser context
2399 * @entity: an Entity pointer
2400 *
2401 * Create a new input stream for wrapping
2402 * blanks around a PEReference
2403 *
2404 * Returns the new input stream or NULL
2405 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002406
Daniel Veillardf5582f12002-06-11 10:08:16 +00002407static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002408
Daniel Veillardf4862f02002-09-10 11:13:43 +00002409static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002410xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411 xmlParserInputPtr input;
2412 xmlChar *buffer;
2413 size_t length;
2414 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002417 return(NULL);
2418 }
2419 if (xmlParserDebugEntities)
2420 xmlGenericError(xmlGenericErrorContext,
2421 "new blanks wrapper for entity: %s\n", entity->name);
2422 input = xmlNewInputStream(ctxt);
2423 if (input == NULL) {
2424 return(NULL);
2425 }
2426 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002427 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002428 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002429 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002430 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002431 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002432 }
2433 buffer [0] = ' ';
2434 buffer [1] = '%';
2435 buffer [length-3] = ';';
2436 buffer [length-2] = ' ';
2437 buffer [length-1] = 0;
2438 memcpy(buffer + 2, entity->name, length - 5);
2439 input->free = deallocblankswrapper;
2440 input->base = buffer;
2441 input->cur = buffer;
2442 input->length = length;
2443 input->end = &buffer[length];
2444 return(input);
2445}
2446
2447/**
Owen Taylor3473f882001-02-23 17:55:21 +00002448 * xmlParserHandlePEReference:
2449 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002450 *
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * [69] PEReference ::= '%' Name ';'
2452 *
2453 * [ WFC: No Recursion ]
2454 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002455 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002456 *
2457 * [ WFC: Entity Declared ]
2458 * In a document without any DTD, a document with only an internal DTD
2459 * subset which contains no parameter entity references, or a document
2460 * with "standalone='yes'", ... ... The declaration of a parameter
2461 * entity must precede any reference to it...
2462 *
2463 * [ VC: Entity Declared ]
2464 * In a document with an external subset or external parameter entities
2465 * with "standalone='no'", ... ... The declaration of a parameter entity
2466 * must precede any reference to it...
2467 *
2468 * [ WFC: In DTD ]
2469 * Parameter-entity references may only appear in the DTD.
2470 * NOTE: misleading but this is handled.
2471 *
2472 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002473 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002474 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002475 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002476 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002477 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002478 */
2479void
2480xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002481 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002482 xmlEntityPtr entity = NULL;
2483 xmlParserInputPtr input;
2484
Owen Taylor3473f882001-02-23 17:55:21 +00002485 if (RAW != '%') return;
2486 switch(ctxt->instate) {
2487 case XML_PARSER_CDATA_SECTION:
2488 return;
2489 case XML_PARSER_COMMENT:
2490 return;
2491 case XML_PARSER_START_TAG:
2492 return;
2493 case XML_PARSER_END_TAG:
2494 return;
2495 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002496 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002497 return;
2498 case XML_PARSER_PROLOG:
2499 case XML_PARSER_START:
2500 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002501 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002502 return;
2503 case XML_PARSER_ENTITY_DECL:
2504 case XML_PARSER_CONTENT:
2505 case XML_PARSER_ATTRIBUTE_VALUE:
2506 case XML_PARSER_PI:
2507 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002508 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002509 /* we just ignore it there */
2510 return;
2511 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002512 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002513 return;
2514 case XML_PARSER_ENTITY_VALUE:
2515 /*
2516 * NOTE: in the case of entity values, we don't do the
2517 * substitution here since we need the literal
2518 * entity value to be able to save the internal
2519 * subset of the document.
2520 * This will be handled by xmlStringDecodeEntities
2521 */
2522 return;
2523 case XML_PARSER_DTD:
2524 /*
2525 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526 * In the internal DTD subset, parameter-entity references
2527 * can occur only where markup declarations can occur, not
2528 * within markup declarations.
2529 * In that case this is handled in xmlParseMarkupDecl
2530 */
2531 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2532 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002533 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002534 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002535 break;
2536 case XML_PARSER_IGNORE:
2537 return;
2538 }
2539
2540 NEXT;
2541 name = xmlParseName(ctxt);
2542 if (xmlParserDebugEntities)
2543 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002544 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002545 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002546 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002547 } else {
2548 if (RAW == ';') {
2549 NEXT;
2550 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002552 if (ctxt->instate == XML_PARSER_EOF)
2553 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002554 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002555
Owen Taylor3473f882001-02-23 17:55:21 +00002556 /*
2557 * [ WFC: Entity Declared ]
2558 * In a document without any DTD, a document with only an
2559 * internal DTD subset which contains no parameter entity
2560 * references, or a document with "standalone='yes'", ...
2561 * ... The declaration of a parameter entity must precede
2562 * any reference to it...
2563 */
2564 if ((ctxt->standalone == 1) ||
2565 ((ctxt->hasExternalSubset == 0) &&
2566 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002567 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002568 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002569 } else {
2570 /*
2571 * [ VC: Entity Declared ]
2572 * In a document with an external subset or external
2573 * parameter entities with "standalone='no'", ...
2574 * ... The declaration of a parameter entity must precede
2575 * any reference to it...
2576 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002577 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2578 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2579 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002580 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002581 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002582 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2583 "PEReference: %%%s; not found\n",
2584 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002585 ctxt->valid = 0;
2586 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002587 } else if (ctxt->input->free != deallocblankswrapper) {
2588 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002589 if (xmlPushInput(ctxt, input) < 0)
2590 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002591 } else {
2592 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2593 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002594 xmlChar start[4];
2595 xmlCharEncoding enc;
2596
Owen Taylor3473f882001-02-23 17:55:21 +00002597 /*
2598 * handle the extra spaces added before and after
2599 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002600 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002601 */
2602 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002603 if (xmlPushInput(ctxt, input) < 0)
2604 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002605
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002606 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002607 * Get the 4 first bytes and decode the charset
2608 * if enc != XML_CHAR_ENCODING_NONE
2609 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002610 * Note that, since we may have some non-UTF8
2611 * encoding (like UTF16, bug 135229), the 'length'
2612 * is not known, but we can calculate based upon
2613 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002614 */
2615 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002616 if (ctxt->instate == XML_PARSER_EOF)
2617 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002618 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002619 start[0] = RAW;
2620 start[1] = NXT(1);
2621 start[2] = NXT(2);
2622 start[3] = NXT(3);
2623 enc = xmlDetectCharEncoding(start, 4);
2624 if (enc != XML_CHAR_ENCODING_NONE) {
2625 xmlSwitchEncoding(ctxt, enc);
2626 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002627 }
2628
Owen Taylor3473f882001-02-23 17:55:21 +00002629 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002630 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2631 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002632 xmlParseTextDecl(ctxt);
2633 }
Owen Taylor3473f882001-02-23 17:55:21 +00002634 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002635 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2636 "PEReference: %s is not a parameter entity\n",
2637 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002638 }
2639 }
2640 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002641 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002642 }
Owen Taylor3473f882001-02-23 17:55:21 +00002643 }
2644}
2645
2646/*
2647 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002648 * buffer##_size is expected to be a size_t
2649 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002650 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002651#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002652 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002653 size_t new_size = buffer##_size * 2 + n; \
2654 if (new_size < buffer##_size) goto mem_error; \
2655 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002656 if (tmp == NULL) goto mem_error; \
2657 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002658 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002659}
2660
2661/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002662 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002663 * @ctxt: the parser context
2664 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002665 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002666 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2667 * @end: an end marker xmlChar, 0 if none
2668 * @end2: an end marker xmlChar, 0 if none
2669 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002670 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002671 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002672 *
2673 * [67] Reference ::= EntityRef | CharRef
2674 *
2675 * [69] PEReference ::= '%' Name ';'
2676 *
2677 * Returns A newly allocated string with the substitution done. The caller
2678 * must deallocate it !
2679 */
2680xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002681xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2682 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002683 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002684 size_t buffer_size = 0;
2685 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002686
2687 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002688 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002689 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 xmlEntityPtr ent;
2691 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002692
Daniel Veillarda82b1822004-11-08 16:24:57 +00002693 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002694 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002695 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002696
Daniel Veillard0161e632008-08-28 15:36:32 +00002697 if (((ctxt->depth > 40) &&
2698 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2699 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002700 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002701 return(NULL);
2702 }
2703
2704 /*
2705 * allocate a translation buffer.
2706 */
2707 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002708 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002709 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002710
2711 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002712 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002713 * we are operating on already parsed values.
2714 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002715 if (str < last)
2716 c = CUR_SCHAR(str, l);
2717 else
2718 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002719 while ((c != 0) && (c != end) && /* non input consuming loop */
2720 (c != end2) && (c != end3)) {
2721
2722 if (c == 0) break;
2723 if ((c == '&') && (str[1] == '#')) {
2724 int val = xmlParseStringCharRef(ctxt, &str);
2725 if (val != 0) {
2726 COPY_BUF(0,buffer,nbchars,val);
2727 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002728 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002729 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002730 }
Owen Taylor3473f882001-02-23 17:55:21 +00002731 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2732 if (xmlParserDebugEntities)
2733 xmlGenericError(xmlGenericErrorContext,
2734 "String decoding Entity Reference: %.30s\n",
2735 str);
2736 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002737 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2738 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002739 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002740 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002741 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002742 if ((ent != NULL) &&
2743 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2744 if (ent->content != NULL) {
2745 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002746 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002747 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002748 }
Owen Taylor3473f882001-02-23 17:55:21 +00002749 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002750 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2751 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002752 }
2753 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002754 ctxt->depth++;
2755 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2756 0, 0, 0);
2757 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002758
Owen Taylor3473f882001-02-23 17:55:21 +00002759 if (rep != NULL) {
2760 current = rep;
2761 while (*current != 0) { /* non input consuming loop */
2762 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002763 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002764 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002765 goto int_error;
2766 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002767 }
2768 }
2769 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002770 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002771 }
2772 } else if (ent != NULL) {
2773 int i = xmlStrlen(ent->name);
2774 const xmlChar *cur = ent->name;
2775
2776 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002777 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002778 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002779 }
2780 for (;i > 0;i--)
2781 buffer[nbchars++] = *cur++;
2782 buffer[nbchars++] = ';';
2783 }
2784 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2785 if (xmlParserDebugEntities)
2786 xmlGenericError(xmlGenericErrorContext,
2787 "String decoding PE Reference: %.30s\n", str);
2788 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002789 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2790 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002791 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002792 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002793 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002794 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002795 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002796 }
Owen Taylor3473f882001-02-23 17:55:21 +00002797 ctxt->depth++;
2798 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2799 0, 0, 0);
2800 ctxt->depth--;
2801 if (rep != NULL) {
2802 current = rep;
2803 while (*current != 0) { /* non input consuming loop */
2804 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002805 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002806 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002807 goto int_error;
2808 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002809 }
2810 }
2811 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002812 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002813 }
2814 }
2815 } else {
2816 COPY_BUF(l,buffer,nbchars,c);
2817 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002818 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2819 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002820 }
2821 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002822 if (str < last)
2823 c = CUR_SCHAR(str, l);
2824 else
2825 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002826 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002827 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002828 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002829
2830mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002831 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002832int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002833 if (rep != NULL)
2834 xmlFree(rep);
2835 if (buffer != NULL)
2836 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002837 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002838}
2839
Daniel Veillarde57ec792003-09-10 10:50:59 +00002840/**
2841 * xmlStringDecodeEntities:
2842 * @ctxt: the parser context
2843 * @str: the input string
2844 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2845 * @end: an end marker xmlChar, 0 if none
2846 * @end2: an end marker xmlChar, 0 if none
2847 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002848 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002849 * Takes a entity string content and process to do the adequate substitutions.
2850 *
2851 * [67] Reference ::= EntityRef | CharRef
2852 *
2853 * [69] PEReference ::= '%' Name ';'
2854 *
2855 * Returns A newly allocated string with the substitution done. The caller
2856 * must deallocate it !
2857 */
2858xmlChar *
2859xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2860 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002861 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002862 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2863 end, end2, end3));
2864}
Owen Taylor3473f882001-02-23 17:55:21 +00002865
2866/************************************************************************
2867 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002868 * Commodity functions, cleanup needed ? *
2869 * *
2870 ************************************************************************/
2871
2872/**
2873 * areBlanks:
2874 * @ctxt: an XML parser context
2875 * @str: a xmlChar *
2876 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002877 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002878 *
2879 * Is this a sequence of blank chars that one can ignore ?
2880 *
2881 * Returns 1 if ignorable 0 otherwise.
2882 */
2883
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002884static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2885 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002886 int i, ret;
2887 xmlNodePtr lastChild;
2888
Daniel Veillard05c13a22001-09-09 08:38:09 +00002889 /*
2890 * Don't spend time trying to differentiate them, the same callback is
2891 * used !
2892 */
2893 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002894 return(0);
2895
Owen Taylor3473f882001-02-23 17:55:21 +00002896 /*
2897 * Check for xml:space value.
2898 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002899 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2900 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002901 return(0);
2902
2903 /*
2904 * Check that the string is made of blanks
2905 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002906 if (blank_chars == 0) {
2907 for (i = 0;i < len;i++)
2908 if (!(IS_BLANK_CH(str[i]))) return(0);
2909 }
Owen Taylor3473f882001-02-23 17:55:21 +00002910
2911 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002912 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002913 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002914 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002915 if (ctxt->myDoc != NULL) {
2916 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2917 if (ret == 0) return(1);
2918 if (ret == 1) return(0);
2919 }
2920
2921 /*
2922 * Otherwise, heuristic :-\
2923 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002924 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002925 if ((ctxt->node->children == NULL) &&
2926 (RAW == '<') && (NXT(1) == '/')) return(0);
2927
2928 lastChild = xmlGetLastChild(ctxt->node);
2929 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002930 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2931 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002932 } else if (xmlNodeIsText(lastChild))
2933 return(0);
2934 else if ((ctxt->node->children != NULL) &&
2935 (xmlNodeIsText(ctxt->node->children)))
2936 return(0);
2937 return(1);
2938}
2939
Owen Taylor3473f882001-02-23 17:55:21 +00002940/************************************************************************
2941 * *
2942 * Extra stuff for namespace support *
2943 * Relates to http://www.w3.org/TR/WD-xml-names *
2944 * *
2945 ************************************************************************/
2946
2947/**
2948 * xmlSplitQName:
2949 * @ctxt: an XML parser context
2950 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002951 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002952 *
2953 * parse an UTF8 encoded XML qualified name string
2954 *
2955 * [NS 5] QName ::= (Prefix ':')? LocalPart
2956 *
2957 * [NS 6] Prefix ::= NCName
2958 *
2959 * [NS 7] LocalPart ::= NCName
2960 *
2961 * Returns the local part, and prefix is updated
2962 * to get the Prefix if any.
2963 */
2964
2965xmlChar *
2966xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2967 xmlChar buf[XML_MAX_NAMELEN + 5];
2968 xmlChar *buffer = NULL;
2969 int len = 0;
2970 int max = XML_MAX_NAMELEN;
2971 xmlChar *ret = NULL;
2972 const xmlChar *cur = name;
2973 int c;
2974
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002975 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002976 *prefix = NULL;
2977
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002978 if (cur == NULL) return(NULL);
2979
Owen Taylor3473f882001-02-23 17:55:21 +00002980#ifndef XML_XML_NAMESPACE
2981 /* xml: prefix is not really a namespace */
2982 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2983 (cur[2] == 'l') && (cur[3] == ':'))
2984 return(xmlStrdup(name));
2985#endif
2986
Daniel Veillard597bc482003-07-24 16:08:28 +00002987 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002988 if (cur[0] == ':')
2989 return(xmlStrdup(name));
2990
2991 c = *cur++;
2992 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2993 buf[len++] = c;
2994 c = *cur++;
2995 }
2996 if (len >= max) {
2997 /*
2998 * Okay someone managed to make a huge name, so he's ready to pay
2999 * for the processing speed.
3000 */
3001 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003002
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003004 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003005 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003006 return(NULL);
3007 }
3008 memcpy(buffer, buf, len);
3009 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3010 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003011 xmlChar *tmp;
3012
Owen Taylor3473f882001-02-23 17:55:21 +00003013 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003014 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003015 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003016 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003017 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003018 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003019 return(NULL);
3020 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003021 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003022 }
3023 buffer[len++] = c;
3024 c = *cur++;
3025 }
3026 buffer[len] = 0;
3027 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003028
Daniel Veillard597bc482003-07-24 16:08:28 +00003029 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003030 if (buffer != NULL)
3031 xmlFree(buffer);
3032 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003033 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003034 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003035
Owen Taylor3473f882001-02-23 17:55:21 +00003036 if (buffer == NULL)
3037 ret = xmlStrndup(buf, len);
3038 else {
3039 ret = buffer;
3040 buffer = NULL;
3041 max = XML_MAX_NAMELEN;
3042 }
3043
3044
3045 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003046 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003047 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003048 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003049 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003050 }
Owen Taylor3473f882001-02-23 17:55:21 +00003051 len = 0;
3052
Daniel Veillardbb284f42002-10-16 18:02:47 +00003053 /*
3054 * Check that the first character is proper to start
3055 * a new name
3056 */
3057 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3058 ((c >= 0x41) && (c <= 0x5A)) ||
3059 (c == '_') || (c == ':'))) {
3060 int l;
3061 int first = CUR_SCHAR(cur, l);
3062
3063 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003064 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003065 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003066 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003067 }
3068 }
3069 cur++;
3070
Owen Taylor3473f882001-02-23 17:55:21 +00003071 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3072 buf[len++] = c;
3073 c = *cur++;
3074 }
3075 if (len >= max) {
3076 /*
3077 * Okay someone managed to make a huge name, so he's ready to pay
3078 * for the processing speed.
3079 */
3080 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003081
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003082 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003083 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003084 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003085 return(NULL);
3086 }
3087 memcpy(buffer, buf, len);
3088 while (c != 0) { /* tested bigname2.xml */
3089 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003090 xmlChar *tmp;
3091
Owen Taylor3473f882001-02-23 17:55:21 +00003092 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003093 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003094 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003095 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003096 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003097 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003098 return(NULL);
3099 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003100 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003101 }
3102 buffer[len++] = c;
3103 c = *cur++;
3104 }
3105 buffer[len] = 0;
3106 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003107
Owen Taylor3473f882001-02-23 17:55:21 +00003108 if (buffer == NULL)
3109 ret = xmlStrndup(buf, len);
3110 else {
3111 ret = buffer;
3112 }
3113 }
3114
3115 return(ret);
3116}
3117
3118/************************************************************************
3119 * *
3120 * The parser itself *
3121 * Relates to http://www.w3.org/TR/REC-xml *
3122 * *
3123 ************************************************************************/
3124
Daniel Veillard34e3f642008-07-29 09:02:27 +00003125/************************************************************************
3126 * *
3127 * Routines to parse Name, NCName and NmToken *
3128 * *
3129 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003130#ifdef DEBUG
3131static unsigned long nbParseName = 0;
3132static unsigned long nbParseNmToken = 0;
3133static unsigned long nbParseNCName = 0;
3134static unsigned long nbParseNCNameComplex = 0;
3135static unsigned long nbParseNameComplex = 0;
3136static unsigned long nbParseStringName = 0;
3137#endif
3138
Daniel Veillard34e3f642008-07-29 09:02:27 +00003139/*
3140 * The two following functions are related to the change of accepted
3141 * characters for Name and NmToken in the Revision 5 of XML-1.0
3142 * They correspond to the modified production [4] and the new production [4a]
3143 * changes in that revision. Also note that the macros used for the
3144 * productions Letter, Digit, CombiningChar and Extender are not needed
3145 * anymore.
3146 * We still keep compatibility to pre-revision5 parsing semantic if the
3147 * new XML_PARSE_OLD10 option is given to the parser.
3148 */
3149static int
3150xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3151 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3152 /*
3153 * Use the new checks of production [4] [4a] amd [5] of the
3154 * Update 5 of XML-1.0
3155 */
3156 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3157 (((c >= 'a') && (c <= 'z')) ||
3158 ((c >= 'A') && (c <= 'Z')) ||
3159 (c == '_') || (c == ':') ||
3160 ((c >= 0xC0) && (c <= 0xD6)) ||
3161 ((c >= 0xD8) && (c <= 0xF6)) ||
3162 ((c >= 0xF8) && (c <= 0x2FF)) ||
3163 ((c >= 0x370) && (c <= 0x37D)) ||
3164 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3165 ((c >= 0x200C) && (c <= 0x200D)) ||
3166 ((c >= 0x2070) && (c <= 0x218F)) ||
3167 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3168 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3169 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3170 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3171 ((c >= 0x10000) && (c <= 0xEFFFF))))
3172 return(1);
3173 } else {
3174 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3175 return(1);
3176 }
3177 return(0);
3178}
3179
3180static int
3181xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3182 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3183 /*
3184 * Use the new checks of production [4] [4a] amd [5] of the
3185 * Update 5 of XML-1.0
3186 */
3187 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3188 (((c >= 'a') && (c <= 'z')) ||
3189 ((c >= 'A') && (c <= 'Z')) ||
3190 ((c >= '0') && (c <= '9')) || /* !start */
3191 (c == '_') || (c == ':') ||
3192 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3193 ((c >= 0xC0) && (c <= 0xD6)) ||
3194 ((c >= 0xD8) && (c <= 0xF6)) ||
3195 ((c >= 0xF8) && (c <= 0x2FF)) ||
3196 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3197 ((c >= 0x370) && (c <= 0x37D)) ||
3198 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3199 ((c >= 0x200C) && (c <= 0x200D)) ||
3200 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3201 ((c >= 0x2070) && (c <= 0x218F)) ||
3202 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3203 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3204 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3205 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3206 ((c >= 0x10000) && (c <= 0xEFFFF))))
3207 return(1);
3208 } else {
3209 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3210 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003211 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003212 (IS_COMBINING(c)) ||
3213 (IS_EXTENDER(c)))
3214 return(1);
3215 }
3216 return(0);
3217}
3218
Daniel Veillarde57ec792003-09-10 10:50:59 +00003219static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003220 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003221
Daniel Veillard34e3f642008-07-29 09:02:27 +00003222static const xmlChar *
3223xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3224 int len = 0, l;
3225 int c;
3226 int count = 0;
3227
Daniel Veillardc6561462009-03-25 10:22:31 +00003228#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003229 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003230#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003231
3232 /*
3233 * Handler for more complex cases
3234 */
3235 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003236 if (ctxt->instate == XML_PARSER_EOF)
3237 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003238 c = CUR_CHAR(l);
3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3240 /*
3241 * Use the new checks of production [4] [4a] amd [5] of the
3242 * Update 5 of XML-1.0
3243 */
3244 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3245 (!(((c >= 'a') && (c <= 'z')) ||
3246 ((c >= 'A') && (c <= 'Z')) ||
3247 (c == '_') || (c == ':') ||
3248 ((c >= 0xC0) && (c <= 0xD6)) ||
3249 ((c >= 0xD8) && (c <= 0xF6)) ||
3250 ((c >= 0xF8) && (c <= 0x2FF)) ||
3251 ((c >= 0x370) && (c <= 0x37D)) ||
3252 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253 ((c >= 0x200C) && (c <= 0x200D)) ||
3254 ((c >= 0x2070) && (c <= 0x218F)) ||
3255 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3260 return(NULL);
3261 }
3262 len += l;
3263 NEXTL(l);
3264 c = CUR_CHAR(l);
3265 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3266 (((c >= 'a') && (c <= 'z')) ||
3267 ((c >= 'A') && (c <= 'Z')) ||
3268 ((c >= '0') && (c <= '9')) || /* !start */
3269 (c == '_') || (c == ':') ||
3270 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3271 ((c >= 0xC0) && (c <= 0xD6)) ||
3272 ((c >= 0xD8) && (c <= 0xF6)) ||
3273 ((c >= 0xF8) && (c <= 0x2FF)) ||
3274 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3275 ((c >= 0x370) && (c <= 0x37D)) ||
3276 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3277 ((c >= 0x200C) && (c <= 0x200D)) ||
3278 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3279 ((c >= 0x2070) && (c <= 0x218F)) ||
3280 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3281 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3282 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3283 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3284 ((c >= 0x10000) && (c <= 0xEFFFF))
3285 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003286 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003287 count = 0;
3288 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003289 if (ctxt->instate == XML_PARSER_EOF)
3290 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003291 }
3292 len += l;
3293 NEXTL(l);
3294 c = CUR_CHAR(l);
3295 }
3296 } else {
3297 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3298 (!IS_LETTER(c) && (c != '_') &&
3299 (c != ':'))) {
3300 return(NULL);
3301 }
3302 len += l;
3303 NEXTL(l);
3304 c = CUR_CHAR(l);
3305
3306 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3307 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3308 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003309 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003310 (IS_COMBINING(c)) ||
3311 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003312 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003313 count = 0;
3314 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003315 if (ctxt->instate == XML_PARSER_EOF)
3316 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003317 }
3318 len += l;
3319 NEXTL(l);
3320 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003321 if (c == 0) {
3322 count = 0;
3323 GROW;
3324 if (ctxt->instate == XML_PARSER_EOF)
3325 return(NULL);
3326 c = CUR_CHAR(l);
3327 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003328 }
3329 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003330 if ((len > XML_MAX_NAME_LENGTH) &&
3331 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3332 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3333 return(NULL);
3334 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003335 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3336 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3337 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3338}
3339
Owen Taylor3473f882001-02-23 17:55:21 +00003340/**
3341 * xmlParseName:
3342 * @ctxt: an XML parser context
3343 *
3344 * parse an XML name.
3345 *
3346 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3347 * CombiningChar | Extender
3348 *
3349 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3350 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003351 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003352 *
3353 * Returns the Name parsed or NULL
3354 */
3355
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003356const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003357xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003358 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003359 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003360 int count = 0;
3361
3362 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003363
Daniel Veillardc6561462009-03-25 10:22:31 +00003364#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003365 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003366#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003367
Daniel Veillard48b2f892001-02-25 16:11:03 +00003368 /*
3369 * Accelerator for simple ASCII names
3370 */
3371 in = ctxt->input->cur;
3372 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 ((*in >= 0x41) && (*in <= 0x5A)) ||
3374 (*in == '_') || (*in == ':')) {
3375 in++;
3376 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003379 (*in == '_') || (*in == '-') ||
3380 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003381 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003382 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003383 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003384 if ((count > XML_MAX_NAME_LENGTH) &&
3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3387 return(NULL);
3388 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003389 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003390 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003391 ctxt->nbChars += count;
3392 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003393 if (ret == NULL)
3394 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003395 return(ret);
3396 }
3397 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003398 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003399 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003400}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003401
Daniel Veillard34e3f642008-07-29 09:02:27 +00003402static const xmlChar *
3403xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3404 int len = 0, l;
3405 int c;
3406 int count = 0;
3407
Daniel Veillardc6561462009-03-25 10:22:31 +00003408#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003409 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003410#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003411
3412 /*
3413 * Handler for more complex cases
3414 */
3415 GROW;
3416 c = CUR_CHAR(l);
3417 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3418 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3419 return(NULL);
3420 }
3421
3422 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3423 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003424 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003425 if ((len > XML_MAX_NAME_LENGTH) &&
3426 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3427 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3428 return(NULL);
3429 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003430 count = 0;
3431 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003432 if (ctxt->instate == XML_PARSER_EOF)
3433 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003434 }
3435 len += l;
3436 NEXTL(l);
3437 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003438 if (c == 0) {
3439 count = 0;
3440 GROW;
3441 if (ctxt->instate == XML_PARSER_EOF)
3442 return(NULL);
3443 c = CUR_CHAR(l);
3444 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003445 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003446 if ((len > XML_MAX_NAME_LENGTH) &&
3447 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3448 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3449 return(NULL);
3450 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003451 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3452}
3453
3454/**
3455 * xmlParseNCName:
3456 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003457 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003458 *
3459 * parse an XML name.
3460 *
3461 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3462 * CombiningChar | Extender
3463 *
3464 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3465 *
3466 * Returns the Name parsed or NULL
3467 */
3468
3469static const xmlChar *
3470xmlParseNCName(xmlParserCtxtPtr ctxt) {
3471 const xmlChar *in;
3472 const xmlChar *ret;
3473 int count = 0;
3474
Daniel Veillardc6561462009-03-25 10:22:31 +00003475#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003476 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003477#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478
3479 /*
3480 * Accelerator for simple ASCII names
3481 */
3482 in = ctxt->input->cur;
3483 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3484 ((*in >= 0x41) && (*in <= 0x5A)) ||
3485 (*in == '_')) {
3486 in++;
3487 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3488 ((*in >= 0x41) && (*in <= 0x5A)) ||
3489 ((*in >= 0x30) && (*in <= 0x39)) ||
3490 (*in == '_') || (*in == '-') ||
3491 (*in == '.'))
3492 in++;
3493 if ((*in > 0) && (*in < 0x80)) {
3494 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003495 if ((count > XML_MAX_NAME_LENGTH) &&
3496 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3497 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3498 return(NULL);
3499 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003500 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501 ctxt->input->cur = in;
3502 ctxt->nbChars += count;
3503 ctxt->input->col += count;
3504 if (ret == NULL) {
3505 xmlErrMemory(ctxt, NULL);
3506 }
3507 return(ret);
3508 }
3509 }
3510 return(xmlParseNCNameComplex(ctxt));
3511}
3512
Daniel Veillard46de64e2002-05-29 08:21:33 +00003513/**
3514 * xmlParseNameAndCompare:
3515 * @ctxt: an XML parser context
3516 *
3517 * parse an XML name and compares for match
3518 * (specialized for endtag parsing)
3519 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003520 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3521 * and the name for mismatch
3522 */
3523
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003524static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003525xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003526 register const xmlChar *cmp = other;
3527 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003528 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003529
3530 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003531 if (ctxt->instate == XML_PARSER_EOF)
3532 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003533
Daniel Veillard46de64e2002-05-29 08:21:33 +00003534 in = ctxt->input->cur;
3535 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003536 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003537 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003538 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003539 }
William M. Brack76e95df2003-10-18 16:20:14 +00003540 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003541 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003542 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003543 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003544 }
3545 /* failure (or end of input buffer), check with full function */
3546 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003547 /* strings coming from the dictionnary direct compare possible */
3548 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003549 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003550 }
3551 return ret;
3552}
3553
Owen Taylor3473f882001-02-23 17:55:21 +00003554/**
3555 * xmlParseStringName:
3556 * @ctxt: an XML parser context
3557 * @str: a pointer to the string pointer (IN/OUT)
3558 *
3559 * parse an XML name.
3560 *
3561 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562 * CombiningChar | Extender
3563 *
3564 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3565 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003566 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003567 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003568 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003569 * is updated to the current location in the string.
3570 */
3571
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003572static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003573xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3574 xmlChar buf[XML_MAX_NAMELEN + 5];
3575 const xmlChar *cur = *str;
3576 int len = 0, l;
3577 int c;
3578
Daniel Veillardc6561462009-03-25 10:22:31 +00003579#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003580 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003581#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003582
Owen Taylor3473f882001-02-23 17:55:21 +00003583 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003584 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003585 return(NULL);
3586 }
3587
Daniel Veillard34e3f642008-07-29 09:02:27 +00003588 COPY_BUF(l,buf,len,c);
3589 cur += l;
3590 c = CUR_SCHAR(cur, l);
3591 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003592 COPY_BUF(l,buf,len,c);
3593 cur += l;
3594 c = CUR_SCHAR(cur, l);
3595 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3596 /*
3597 * Okay someone managed to make a huge name, so he's ready to pay
3598 * for the processing speed.
3599 */
3600 xmlChar *buffer;
3601 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003602
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003603 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003604 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003605 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003606 return(NULL);
3607 }
3608 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003609 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003610 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003611 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003612
3613 if ((len > XML_MAX_NAME_LENGTH) &&
3614 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3615 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3616 xmlFree(buffer);
3617 return(NULL);
3618 }
Owen Taylor3473f882001-02-23 17:55:21 +00003619 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003620 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003621 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003622 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003623 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003624 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003625 return(NULL);
3626 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003627 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003628 }
3629 COPY_BUF(l,buffer,len,c);
3630 cur += l;
3631 c = CUR_SCHAR(cur, l);
3632 }
3633 buffer[len] = 0;
3634 *str = cur;
3635 return(buffer);
3636 }
3637 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003638 if ((len > XML_MAX_NAME_LENGTH) &&
3639 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3640 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3641 return(NULL);
3642 }
Owen Taylor3473f882001-02-23 17:55:21 +00003643 *str = cur;
3644 return(xmlStrndup(buf, len));
3645}
3646
3647/**
3648 * xmlParseNmtoken:
3649 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003650 *
Owen Taylor3473f882001-02-23 17:55:21 +00003651 * parse an XML Nmtoken.
3652 *
3653 * [7] Nmtoken ::= (NameChar)+
3654 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003655 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003656 *
3657 * Returns the Nmtoken parsed or NULL
3658 */
3659
3660xmlChar *
3661xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3662 xmlChar buf[XML_MAX_NAMELEN + 5];
3663 int len = 0, l;
3664 int c;
3665 int count = 0;
3666
Daniel Veillardc6561462009-03-25 10:22:31 +00003667#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003668 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003669#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003670
Owen Taylor3473f882001-02-23 17:55:21 +00003671 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003672 if (ctxt->instate == XML_PARSER_EOF)
3673 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003674 c = CUR_CHAR(l);
3675
Daniel Veillard34e3f642008-07-29 09:02:27 +00003676 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003677 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003678 count = 0;
3679 GROW;
3680 }
3681 COPY_BUF(l,buf,len,c);
3682 NEXTL(l);
3683 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003684 if (c == 0) {
3685 count = 0;
3686 GROW;
3687 if (ctxt->instate == XML_PARSER_EOF)
3688 return(NULL);
3689 c = CUR_CHAR(l);
3690 }
Owen Taylor3473f882001-02-23 17:55:21 +00003691 if (len >= XML_MAX_NAMELEN) {
3692 /*
3693 * Okay someone managed to make a huge token, so he's ready to pay
3694 * for the processing speed.
3695 */
3696 xmlChar *buffer;
3697 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003698
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003699 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003700 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003701 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003702 return(NULL);
3703 }
3704 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003705 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003706 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003707 count = 0;
3708 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003709 if (ctxt->instate == XML_PARSER_EOF) {
3710 xmlFree(buffer);
3711 return(NULL);
3712 }
Owen Taylor3473f882001-02-23 17:55:21 +00003713 }
3714 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003715 xmlChar *tmp;
3716
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003717 if ((max > XML_MAX_NAME_LENGTH) &&
3718 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3719 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3720 xmlFree(buffer);
3721 return(NULL);
3722 }
Owen Taylor3473f882001-02-23 17:55:21 +00003723 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003724 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003725 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003726 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003727 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003728 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003729 return(NULL);
3730 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003731 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003732 }
3733 COPY_BUF(l,buffer,len,c);
3734 NEXTL(l);
3735 c = CUR_CHAR(l);
3736 }
3737 buffer[len] = 0;
3738 return(buffer);
3739 }
3740 }
3741 if (len == 0)
3742 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003743 if ((len > XML_MAX_NAME_LENGTH) &&
3744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3745 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3746 return(NULL);
3747 }
Owen Taylor3473f882001-02-23 17:55:21 +00003748 return(xmlStrndup(buf, len));
3749}
3750
3751/**
3752 * xmlParseEntityValue:
3753 * @ctxt: an XML parser context
3754 * @orig: if non-NULL store a copy of the original entity value
3755 *
3756 * parse a value for ENTITY declarations
3757 *
3758 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3759 * "'" ([^%&'] | PEReference | Reference)* "'"
3760 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003761 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003762 */
3763
3764xmlChar *
3765xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3766 xmlChar *buf = NULL;
3767 int len = 0;
3768 int size = XML_PARSER_BUFFER_SIZE;
3769 int c, l;
3770 xmlChar stop;
3771 xmlChar *ret = NULL;
3772 const xmlChar *cur = NULL;
3773 xmlParserInputPtr input;
3774
3775 if (RAW == '"') stop = '"';
3776 else if (RAW == '\'') stop = '\'';
3777 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003778 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003779 return(NULL);
3780 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003781 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003782 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003783 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003784 return(NULL);
3785 }
3786
3787 /*
3788 * The content of the entity definition is copied in a buffer.
3789 */
3790
3791 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3792 input = ctxt->input;
3793 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003794 if (ctxt->instate == XML_PARSER_EOF) {
3795 xmlFree(buf);
3796 return(NULL);
3797 }
Owen Taylor3473f882001-02-23 17:55:21 +00003798 NEXT;
3799 c = CUR_CHAR(l);
3800 /*
3801 * NOTE: 4.4.5 Included in Literal
3802 * When a parameter entity reference appears in a literal entity
3803 * value, ... a single or double quote character in the replacement
3804 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003805 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003806 * In practice it means we stop the loop only when back at parsing
3807 * the initial entity and the quote is found
3808 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003809 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3810 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003811 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003812 xmlChar *tmp;
3813
Owen Taylor3473f882001-02-23 17:55:21 +00003814 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003815 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3816 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003817 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003818 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003819 return(NULL);
3820 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003821 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003822 }
3823 COPY_BUF(l,buf,len,c);
3824 NEXTL(l);
3825 /*
3826 * Pop-up of finished entities.
3827 */
3828 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3829 xmlPopInput(ctxt);
3830
3831 GROW;
3832 c = CUR_CHAR(l);
3833 if (c == 0) {
3834 GROW;
3835 c = CUR_CHAR(l);
3836 }
3837 }
3838 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003839 if (ctxt->instate == XML_PARSER_EOF) {
3840 xmlFree(buf);
3841 return(NULL);
3842 }
Owen Taylor3473f882001-02-23 17:55:21 +00003843
3844 /*
3845 * Raise problem w.r.t. '&' and '%' being used in non-entities
3846 * reference constructs. Note Charref will be handled in
3847 * xmlStringDecodeEntities()
3848 */
3849 cur = buf;
3850 while (*cur != 0) { /* non input consuming */
3851 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3852 xmlChar *name;
3853 xmlChar tmp = *cur;
3854
3855 cur++;
3856 name = xmlParseStringName(ctxt, &cur);
3857 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003858 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003859 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003860 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003861 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003862 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3863 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003864 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003865 }
3866 if (name != NULL)
3867 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003868 if (*cur == 0)
3869 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003870 }
3871 cur++;
3872 }
3873
3874 /*
3875 * Then PEReference entities are substituted.
3876 */
3877 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003878 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003879 xmlFree(buf);
3880 } else {
3881 NEXT;
3882 /*
3883 * NOTE: 4.4.7 Bypassed
3884 * When a general entity reference appears in the EntityValue in
3885 * an entity declaration, it is bypassed and left as is.
3886 * so XML_SUBSTITUTE_REF is not set here.
3887 */
3888 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3889 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003890 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003891 *orig = buf;
3892 else
3893 xmlFree(buf);
3894 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003895
Owen Taylor3473f882001-02-23 17:55:21 +00003896 return(ret);
3897}
3898
3899/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003900 * xmlParseAttValueComplex:
3901 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003902 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003903 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003904 *
3905 * parse a value for an attribute, this is the fallback function
3906 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003907 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003908 *
3909 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3910 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003911static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003912xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003913 xmlChar limit = 0;
3914 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003915 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003916 size_t len = 0;
3917 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003918 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003919 xmlChar *current = NULL;
3920 xmlEntityPtr ent;
3921
Owen Taylor3473f882001-02-23 17:55:21 +00003922 if (NXT(0) == '"') {
3923 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3924 limit = '"';
3925 NEXT;
3926 } else if (NXT(0) == '\'') {
3927 limit = '\'';
3928 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3929 NEXT;
3930 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003931 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003932 return(NULL);
3933 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003934
Owen Taylor3473f882001-02-23 17:55:21 +00003935 /*
3936 * allocate a translation buffer.
3937 */
3938 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003939 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003940 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003941
3942 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003943 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003944 */
3945 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003946 while (((NXT(0) != limit) && /* checked */
3947 (IS_CHAR(c)) && (c != '<')) &&
3948 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003949 /*
3950 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3951 * special option is given
3952 */
3953 if ((len > XML_MAX_TEXT_LENGTH) &&
3954 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3955 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003956 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003957 goto mem_error;
3958 }
Owen Taylor3473f882001-02-23 17:55:21 +00003959 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003960 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003961 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003962 if (NXT(1) == '#') {
3963 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003964
Owen Taylor3473f882001-02-23 17:55:21 +00003965 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003966 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003967 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003968 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003969 }
3970 buf[len++] = '&';
3971 } else {
3972 /*
3973 * The reparsing will be done in xmlStringGetNodeList()
3974 * called by the attribute() function in SAX.c
3975 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003976 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003977 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003978 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003979 buf[len++] = '&';
3980 buf[len++] = '#';
3981 buf[len++] = '3';
3982 buf[len++] = '8';
3983 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003984 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003985 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003986 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003987 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003988 }
Owen Taylor3473f882001-02-23 17:55:21 +00003989 len += xmlCopyChar(0, &buf[len], val);
3990 }
3991 } else {
3992 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003993 ctxt->nbentities++;
3994 if (ent != NULL)
3995 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003996 if ((ent != NULL) &&
3997 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003998 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003999 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004000 }
4001 if ((ctxt->replaceEntities == 0) &&
4002 (ent->content[0] == '&')) {
4003 buf[len++] = '&';
4004 buf[len++] = '#';
4005 buf[len++] = '3';
4006 buf[len++] = '8';
4007 buf[len++] = ';';
4008 } else {
4009 buf[len++] = ent->content[0];
4010 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004011 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004012 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004013 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4014 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004015 XML_SUBSTITUTE_REF,
4016 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004017 if (rep != NULL) {
4018 current = rep;
4019 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004020 if ((*current == 0xD) || (*current == 0xA) ||
4021 (*current == 0x9)) {
4022 buf[len++] = 0x20;
4023 current++;
4024 } else
4025 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004026 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004027 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004028 }
4029 }
4030 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004031 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 }
4033 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004034 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004035 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004036 }
Owen Taylor3473f882001-02-23 17:55:21 +00004037 if (ent->content != NULL)
4038 buf[len++] = ent->content[0];
4039 }
4040 } else if (ent != NULL) {
4041 int i = xmlStrlen(ent->name);
4042 const xmlChar *cur = ent->name;
4043
4044 /*
4045 * This may look absurd but is needed to detect
4046 * entities problems
4047 */
4048 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004049 (ent->content != NULL) && (ent->checked == 0)) {
4050 unsigned long oldnbent = ctxt->nbentities;
4051
Owen Taylor3473f882001-02-23 17:55:21 +00004052 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004053 XML_SUBSTITUTE_REF, 0, 0, 0);
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004054
Daniel Veillardcff25462013-03-11 15:57:55 +08004055 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004056 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004057 if (xmlStrchr(rep, '<'))
4058 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004059 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004060 rep = NULL;
4061 }
Owen Taylor3473f882001-02-23 17:55:21 +00004062 }
4063
4064 /*
4065 * Just output the reference
4066 */
4067 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004068 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004069 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004070 }
4071 for (;i > 0;i--)
4072 buf[len++] = *cur++;
4073 buf[len++] = ';';
4074 }
4075 }
4076 } else {
4077 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004078 if ((len != 0) || (!normalize)) {
4079 if ((!normalize) || (!in_space)) {
4080 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004081 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004082 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004083 }
4084 }
4085 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004086 }
4087 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004088 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004089 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004090 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004091 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004092 }
4093 }
4094 NEXTL(l);
4095 }
4096 GROW;
4097 c = CUR_CHAR(l);
4098 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004099 if (ctxt->instate == XML_PARSER_EOF)
4100 goto error;
4101
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004102 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004103 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004104 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004105 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004106 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004107 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004108 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004109 if ((c != 0) && (!IS_CHAR(c))) {
4110 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4111 "invalid character in attribute value\n");
4112 } else {
4113 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4114 "AttValue: ' expected\n");
4115 }
Owen Taylor3473f882001-02-23 17:55:21 +00004116 } else
4117 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004118
4119 /*
4120 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004121 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004122 */
4123 if (len >= INT_MAX) {
4124 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004125 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004126 goto mem_error;
4127 }
4128
4129 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004131
4132mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004133 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004134error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004135 if (buf != NULL)
4136 xmlFree(buf);
4137 if (rep != NULL)
4138 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004139 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004140}
4141
4142/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004143 * xmlParseAttValue:
4144 * @ctxt: an XML parser context
4145 *
4146 * parse a value for an attribute
4147 * Note: the parser won't do substitution of entities here, this
4148 * will be handled later in xmlStringGetNodeList
4149 *
4150 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4151 * "'" ([^<&'] | Reference)* "'"
4152 *
4153 * 3.3.3 Attribute-Value Normalization:
4154 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004155 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004156 * - a character reference is processed by appending the referenced
4157 * character to the attribute value
4158 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004159 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004160 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4161 * appending #x20 to the normalized value, except that only a single
4162 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004163 * parsed entity or the literal entity value of an internal parsed entity
4164 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004165 * If the declared value is not CDATA, then the XML processor must further
4166 * process the normalized attribute value by discarding any leading and
4167 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004168 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004169 * All attributes for which no declaration has been read should be treated
4170 * by a non-validating parser as if declared CDATA.
4171 *
4172 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4173 */
4174
4175
4176xmlChar *
4177xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004178 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004179 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004180}
4181
4182/**
Owen Taylor3473f882001-02-23 17:55:21 +00004183 * xmlParseSystemLiteral:
4184 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004185 *
Owen Taylor3473f882001-02-23 17:55:21 +00004186 * parse an XML Literal
4187 *
4188 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4189 *
4190 * Returns the SystemLiteral parsed or NULL
4191 */
4192
4193xmlChar *
4194xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4195 xmlChar *buf = NULL;
4196 int len = 0;
4197 int size = XML_PARSER_BUFFER_SIZE;
4198 int cur, l;
4199 xmlChar stop;
4200 int state = ctxt->instate;
4201 int count = 0;
4202
4203 SHRINK;
4204 if (RAW == '"') {
4205 NEXT;
4206 stop = '"';
4207 } else if (RAW == '\'') {
4208 NEXT;
4209 stop = '\'';
4210 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004211 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004212 return(NULL);
4213 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004214
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004215 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004216 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004217 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004218 return(NULL);
4219 }
4220 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4221 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004222 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004223 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004224 xmlChar *tmp;
4225
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004226 if ((size > XML_MAX_NAME_LENGTH) &&
4227 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4228 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4229 xmlFree(buf);
4230 ctxt->instate = (xmlParserInputState) state;
4231 return(NULL);
4232 }
Owen Taylor3473f882001-02-23 17:55:21 +00004233 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004234 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4235 if (tmp == NULL) {
4236 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004237 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004238 ctxt->instate = (xmlParserInputState) state;
4239 return(NULL);
4240 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004241 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004242 }
4243 count++;
4244 if (count > 50) {
4245 GROW;
4246 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004247 if (ctxt->instate == XML_PARSER_EOF) {
4248 xmlFree(buf);
4249 return(NULL);
4250 }
Owen Taylor3473f882001-02-23 17:55:21 +00004251 }
4252 COPY_BUF(l,buf,len,cur);
4253 NEXTL(l);
4254 cur = CUR_CHAR(l);
4255 if (cur == 0) {
4256 GROW;
4257 SHRINK;
4258 cur = CUR_CHAR(l);
4259 }
4260 }
4261 buf[len] = 0;
4262 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004263 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004264 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004265 } else {
4266 NEXT;
4267 }
4268 return(buf);
4269}
4270
4271/**
4272 * xmlParsePubidLiteral:
4273 * @ctxt: an XML parser context
4274 *
4275 * parse an XML public literal
4276 *
4277 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4278 *
4279 * Returns the PubidLiteral parsed or NULL.
4280 */
4281
4282xmlChar *
4283xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4284 xmlChar *buf = NULL;
4285 int len = 0;
4286 int size = XML_PARSER_BUFFER_SIZE;
4287 xmlChar cur;
4288 xmlChar stop;
4289 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004290 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004291
4292 SHRINK;
4293 if (RAW == '"') {
4294 NEXT;
4295 stop = '"';
4296 } else if (RAW == '\'') {
4297 NEXT;
4298 stop = '\'';
4299 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004300 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004301 return(NULL);
4302 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004303 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004304 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004305 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004306 return(NULL);
4307 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004308 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004309 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004310 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004311 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004312 xmlChar *tmp;
4313
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004314 if ((size > XML_MAX_NAME_LENGTH) &&
4315 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4316 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4317 xmlFree(buf);
4318 return(NULL);
4319 }
Owen Taylor3473f882001-02-23 17:55:21 +00004320 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004321 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4322 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004323 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004324 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 return(NULL);
4326 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004327 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004328 }
4329 buf[len++] = cur;
4330 count++;
4331 if (count > 50) {
4332 GROW;
4333 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004334 if (ctxt->instate == XML_PARSER_EOF) {
4335 xmlFree(buf);
4336 return(NULL);
4337 }
Owen Taylor3473f882001-02-23 17:55:21 +00004338 }
4339 NEXT;
4340 cur = CUR;
4341 if (cur == 0) {
4342 GROW;
4343 SHRINK;
4344 cur = CUR;
4345 }
4346 }
4347 buf[len] = 0;
4348 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004349 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004350 } else {
4351 NEXT;
4352 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004353 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004354 return(buf);
4355}
4356
Daniel Veillard8ed10722009-08-20 19:17:36 +02004357static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004358
4359/*
4360 * used for the test in the inner loop of the char data testing
4361 */
4362static const unsigned char test_char_data[256] = {
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4368 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4369 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4370 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4371 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4372 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4373 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4374 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4375 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4376 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4378 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4379 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4395};
4396
Owen Taylor3473f882001-02-23 17:55:21 +00004397/**
4398 * xmlParseCharData:
4399 * @ctxt: an XML parser context
4400 * @cdata: int indicating whether we are within a CDATA section
4401 *
4402 * parse a CharData section.
4403 * if we are within a CDATA section ']]>' marks an end of section.
4404 *
4405 * The right angle bracket (>) may be represented using the string "&gt;",
4406 * and must, for compatibility, be escaped using "&gt;" or a character
4407 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004408 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004409 *
4410 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4411 */
4412
4413void
4414xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004415 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004416 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004417 int line = ctxt->input->line;
4418 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004419 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004420
4421 SHRINK;
4422 GROW;
4423 /*
4424 * Accelerated common case where input don't need to be
4425 * modified before passing it to the handler.
4426 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004427 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004428 in = ctxt->input->cur;
4429 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004430get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004431 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004432 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004433 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004434 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004435 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004436 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004437 goto get_more_space;
4438 }
4439 if (*in == '<') {
4440 nbchar = in - ctxt->input->cur;
4441 if (nbchar > 0) {
4442 const xmlChar *tmp = ctxt->input->cur;
4443 ctxt->input->cur = in;
4444
Daniel Veillard34099b42004-11-04 17:34:35 +00004445 if ((ctxt->sax != NULL) &&
4446 (ctxt->sax->ignorableWhitespace !=
4447 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004448 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004449 if (ctxt->sax->ignorableWhitespace != NULL)
4450 ctxt->sax->ignorableWhitespace(ctxt->userData,
4451 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004452 } else {
4453 if (ctxt->sax->characters != NULL)
4454 ctxt->sax->characters(ctxt->userData,
4455 tmp, nbchar);
4456 if (*ctxt->space == -1)
4457 *ctxt->space = -2;
4458 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004459 } else if ((ctxt->sax != NULL) &&
4460 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004461 ctxt->sax->characters(ctxt->userData,
4462 tmp, nbchar);
4463 }
4464 }
4465 return;
4466 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004467
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004468get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004469 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004470 while (test_char_data[*in]) {
4471 in++;
4472 ccol++;
4473 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004474 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004475 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004476 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004477 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004478 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004479 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004480 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004481 }
4482 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004483 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004484 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004485 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004486 return;
4487 }
4488 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004489 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004490 goto get_more;
4491 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004492 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004493 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004494 if ((ctxt->sax != NULL) &&
4495 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004496 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004497 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004498 const xmlChar *tmp = ctxt->input->cur;
4499 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004500
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004501 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004502 if (ctxt->sax->ignorableWhitespace != NULL)
4503 ctxt->sax->ignorableWhitespace(ctxt->userData,
4504 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004505 } else {
4506 if (ctxt->sax->characters != NULL)
4507 ctxt->sax->characters(ctxt->userData,
4508 tmp, nbchar);
4509 if (*ctxt->space == -1)
4510 *ctxt->space = -2;
4511 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004512 line = ctxt->input->line;
4513 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004514 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004515 if (ctxt->sax->characters != NULL)
4516 ctxt->sax->characters(ctxt->userData,
4517 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004518 line = ctxt->input->line;
4519 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004520 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004521 /* something really bad happened in the SAX callback */
4522 if (ctxt->instate != XML_PARSER_CONTENT)
4523 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004524 }
4525 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004526 if (*in == 0xD) {
4527 in++;
4528 if (*in == 0xA) {
4529 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004530 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004531 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004532 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004533 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004534 in--;
4535 }
4536 if (*in == '<') {
4537 return;
4538 }
4539 if (*in == '&') {
4540 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004541 }
4542 SHRINK;
4543 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004544 if (ctxt->instate == XML_PARSER_EOF)
4545 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004546 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004547 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004548 nbchar = 0;
4549 }
Daniel Veillard50582112001-03-26 22:52:16 +00004550 ctxt->input->line = line;
4551 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004552 xmlParseCharDataComplex(ctxt, cdata);
4553}
4554
Daniel Veillard01c13b52002-12-10 15:19:08 +00004555/**
4556 * xmlParseCharDataComplex:
4557 * @ctxt: an XML parser context
4558 * @cdata: int indicating whether we are within a CDATA section
4559 *
4560 * parse a CharData section.this is the fallback function
4561 * of xmlParseCharData() when the parsing requires handling
4562 * of non-ASCII characters.
4563 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004564static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004565xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004566 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4567 int nbchar = 0;
4568 int cur, l;
4569 int count = 0;
4570
4571 SHRINK;
4572 GROW;
4573 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004574 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004575 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004576 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004577 if ((cur == ']') && (NXT(1) == ']') &&
4578 (NXT(2) == '>')) {
4579 if (cdata) break;
4580 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004581 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004582 }
4583 }
4584 COPY_BUF(l,buf,nbchar,cur);
4585 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004586 buf[nbchar] = 0;
4587
Owen Taylor3473f882001-02-23 17:55:21 +00004588 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004589 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004590 */
4591 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004592 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004593 if (ctxt->sax->ignorableWhitespace != NULL)
4594 ctxt->sax->ignorableWhitespace(ctxt->userData,
4595 buf, nbchar);
4596 } else {
4597 if (ctxt->sax->characters != NULL)
4598 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004599 if ((ctxt->sax->characters !=
4600 ctxt->sax->ignorableWhitespace) &&
4601 (*ctxt->space == -1))
4602 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004603 }
4604 }
4605 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004606 /* something really bad happened in the SAX callback */
4607 if (ctxt->instate != XML_PARSER_CONTENT)
4608 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004609 }
4610 count++;
4611 if (count > 50) {
4612 GROW;
4613 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004614 if (ctxt->instate == XML_PARSER_EOF)
4615 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 }
4617 NEXTL(l);
4618 cur = CUR_CHAR(l);
4619 }
4620 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004621 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004622 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004623 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004624 */
4625 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004626 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004627 if (ctxt->sax->ignorableWhitespace != NULL)
4628 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4629 } else {
4630 if (ctxt->sax->characters != NULL)
4631 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004632 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4633 (*ctxt->space == -1))
4634 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004635 }
4636 }
4637 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004638 if ((cur != 0) && (!IS_CHAR(cur))) {
4639 /* Generate the error and skip the offending character */
4640 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4641 "PCDATA invalid Char value %d\n",
4642 cur);
4643 NEXTL(l);
4644 }
Owen Taylor3473f882001-02-23 17:55:21 +00004645}
4646
4647/**
4648 * xmlParseExternalID:
4649 * @ctxt: an XML parser context
4650 * @publicID: a xmlChar** receiving PubidLiteral
4651 * @strict: indicate whether we should restrict parsing to only
4652 * production [75], see NOTE below
4653 *
4654 * Parse an External ID or a Public ID
4655 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004656 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004657 * 'PUBLIC' S PubidLiteral S SystemLiteral
4658 *
4659 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4660 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4661 *
4662 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4663 *
4664 * Returns the function returns SystemLiteral and in the second
4665 * case publicID receives PubidLiteral, is strict is off
4666 * it is possible to return NULL and have publicID set.
4667 */
4668
4669xmlChar *
4670xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4671 xmlChar *URI = NULL;
4672
4673 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004674
4675 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004676 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004677 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004678 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004679 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4680 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004681 }
4682 SKIP_BLANKS;
4683 URI = xmlParseSystemLiteral(ctxt);
4684 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004685 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004686 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004687 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004688 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004689 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004690 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004691 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004692 }
4693 SKIP_BLANKS;
4694 *publicID = xmlParsePubidLiteral(ctxt);
4695 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004696 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004697 }
4698 if (strict) {
4699 /*
4700 * We don't handle [83] so "S SystemLiteral" is required.
4701 */
William M. Brack76e95df2003-10-18 16:20:14 +00004702 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004703 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004704 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004705 }
4706 } else {
4707 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004708 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004709 * "S SystemLiteral" is not detected. From a purely parsing
4710 * point of view that's a nice mess.
4711 */
4712 const xmlChar *ptr;
4713 GROW;
4714
4715 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004716 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004717
William M. Brack76e95df2003-10-18 16:20:14 +00004718 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004719 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4720 }
4721 SKIP_BLANKS;
4722 URI = xmlParseSystemLiteral(ctxt);
4723 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004724 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004725 }
4726 }
4727 return(URI);
4728}
4729
4730/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004731 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004732 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004733 * @buf: the already parsed part of the buffer
4734 * @len: number of bytes filles in the buffer
4735 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004736 *
4737 * Skip an XML (SGML) comment <!-- .... -->
4738 * The spec says that "For compatibility, the string "--" (double-hyphen)
4739 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004740 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004741 *
4742 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4743 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004744static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004745xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4746 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004747 int q, ql;
4748 int r, rl;
4749 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004750 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004751 int inputid;
4752
4753 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004754
Owen Taylor3473f882001-02-23 17:55:21 +00004755 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004756 len = 0;
4757 size = XML_PARSER_BUFFER_SIZE;
4758 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4759 if (buf == NULL) {
4760 xmlErrMemory(ctxt, NULL);
4761 return;
4762 }
Owen Taylor3473f882001-02-23 17:55:21 +00004763 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004764 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004765 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004766 if (q == 0)
4767 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004768 if (!IS_CHAR(q)) {
4769 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4770 "xmlParseComment: invalid xmlChar value %d\n",
4771 q);
4772 xmlFree (buf);
4773 return;
4774 }
Owen Taylor3473f882001-02-23 17:55:21 +00004775 NEXTL(ql);
4776 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004777 if (r == 0)
4778 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004779 if (!IS_CHAR(r)) {
4780 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4781 "xmlParseComment: invalid xmlChar value %d\n",
4782 q);
4783 xmlFree (buf);
4784 return;
4785 }
Owen Taylor3473f882001-02-23 17:55:21 +00004786 NEXTL(rl);
4787 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004788 if (cur == 0)
4789 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004790 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004791 ((cur != '>') ||
4792 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004793 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004794 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004795 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004796 if ((len > XML_MAX_TEXT_LENGTH) &&
4797 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4798 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4799 "Comment too big found", NULL);
4800 xmlFree (buf);
4801 return;
4802 }
Owen Taylor3473f882001-02-23 17:55:21 +00004803 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004804 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004805 size_t new_size;
4806
4807 new_size = size * 2;
4808 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004809 if (new_buf == NULL) {
4810 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004811 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004812 return;
4813 }
William M. Bracka3215c72004-07-31 16:24:01 +00004814 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004815 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004816 }
4817 COPY_BUF(ql,buf,len,q);
4818 q = r;
4819 ql = rl;
4820 r = cur;
4821 rl = l;
4822
4823 count++;
4824 if (count > 50) {
4825 GROW;
4826 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004827 if (ctxt->instate == XML_PARSER_EOF) {
4828 xmlFree(buf);
4829 return;
4830 }
Owen Taylor3473f882001-02-23 17:55:21 +00004831 }
4832 NEXTL(l);
4833 cur = CUR_CHAR(l);
4834 if (cur == 0) {
4835 SHRINK;
4836 GROW;
4837 cur = CUR_CHAR(l);
4838 }
4839 }
4840 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004841 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004842 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004843 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004844 } else if (!IS_CHAR(cur)) {
4845 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4846 "xmlParseComment: invalid xmlChar value %d\n",
4847 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004848 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004849 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004850 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4851 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004852 }
4853 NEXT;
4854 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4855 (!ctxt->disableSAX))
4856 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004857 }
Daniel Veillardda629342007-08-01 07:49:06 +00004858 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004859 return;
4860not_terminated:
4861 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4862 "Comment not terminated\n", NULL);
4863 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004864 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004865}
Daniel Veillardda629342007-08-01 07:49:06 +00004866
Daniel Veillard4c778d82005-01-23 17:37:44 +00004867/**
4868 * xmlParseComment:
4869 * @ctxt: an XML parser context
4870 *
4871 * Skip an XML (SGML) comment <!-- .... -->
4872 * The spec says that "For compatibility, the string "--" (double-hyphen)
4873 * must not occur within comments. "
4874 *
4875 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4876 */
4877void
4878xmlParseComment(xmlParserCtxtPtr ctxt) {
4879 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004880 size_t size = XML_PARSER_BUFFER_SIZE;
4881 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004882 xmlParserInputState state;
4883 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004884 size_t nbchar = 0;
4885 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004886 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004887
4888 /*
4889 * Check that there is a comment right here.
4890 */
4891 if ((RAW != '<') || (NXT(1) != '!') ||
4892 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004893 state = ctxt->instate;
4894 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004895 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004896 SKIP(4);
4897 SHRINK;
4898 GROW;
4899
4900 /*
4901 * Accelerated common case where input don't need to be
4902 * modified before passing it to the handler.
4903 */
4904 in = ctxt->input->cur;
4905 do {
4906 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004907 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004908 ctxt->input->line++; ctxt->input->col = 1;
4909 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004910 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004911 }
4912get_more:
4913 ccol = ctxt->input->col;
4914 while (((*in > '-') && (*in <= 0x7F)) ||
4915 ((*in >= 0x20) && (*in < '-')) ||
4916 (*in == 0x09)) {
4917 in++;
4918 ccol++;
4919 }
4920 ctxt->input->col = ccol;
4921 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004922 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004923 ctxt->input->line++; ctxt->input->col = 1;
4924 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004925 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004926 goto get_more;
4927 }
4928 nbchar = in - ctxt->input->cur;
4929 /*
4930 * save current set of data
4931 */
4932 if (nbchar > 0) {
4933 if ((ctxt->sax != NULL) &&
4934 (ctxt->sax->comment != NULL)) {
4935 if (buf == NULL) {
4936 if ((*in == '-') && (in[1] == '-'))
4937 size = nbchar + 1;
4938 else
4939 size = XML_PARSER_BUFFER_SIZE + nbchar;
4940 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4941 if (buf == NULL) {
4942 xmlErrMemory(ctxt, NULL);
4943 ctxt->instate = state;
4944 return;
4945 }
4946 len = 0;
4947 } else if (len + nbchar + 1 >= size) {
4948 xmlChar *new_buf;
4949 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4950 new_buf = (xmlChar *) xmlRealloc(buf,
4951 size * sizeof(xmlChar));
4952 if (new_buf == NULL) {
4953 xmlFree (buf);
4954 xmlErrMemory(ctxt, NULL);
4955 ctxt->instate = state;
4956 return;
4957 }
4958 buf = new_buf;
4959 }
4960 memcpy(&buf[len], ctxt->input->cur, nbchar);
4961 len += nbchar;
4962 buf[len] = 0;
4963 }
4964 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004965 if ((len > XML_MAX_TEXT_LENGTH) &&
4966 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4967 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4968 "Comment too big found", NULL);
4969 xmlFree (buf);
4970 return;
4971 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004972 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004973 if (*in == 0xA) {
4974 in++;
4975 ctxt->input->line++; ctxt->input->col = 1;
4976 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004977 if (*in == 0xD) {
4978 in++;
4979 if (*in == 0xA) {
4980 ctxt->input->cur = in;
4981 in++;
4982 ctxt->input->line++; ctxt->input->col = 1;
4983 continue; /* while */
4984 }
4985 in--;
4986 }
4987 SHRINK;
4988 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004989 if (ctxt->instate == XML_PARSER_EOF) {
4990 xmlFree(buf);
4991 return;
4992 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004993 in = ctxt->input->cur;
4994 if (*in == '-') {
4995 if (in[1] == '-') {
4996 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004997 if (ctxt->input->id != inputid) {
4998 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4999 "comment doesn't start and stop in the same entity\n");
5000 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005001 SKIP(3);
5002 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5003 (!ctxt->disableSAX)) {
5004 if (buf != NULL)
5005 ctxt->sax->comment(ctxt->userData, buf);
5006 else
5007 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5008 }
5009 if (buf != NULL)
5010 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005011 if (ctxt->instate != XML_PARSER_EOF)
5012 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005013 return;
5014 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005015 if (buf != NULL) {
5016 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5017 "Double hyphen within comment: "
5018 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005019 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005020 } else
5021 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5022 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005023 in++;
5024 ctxt->input->col++;
5025 }
5026 in++;
5027 ctxt->input->col++;
5028 goto get_more;
5029 }
5030 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5031 xmlParseCommentComplex(ctxt, buf, len, size);
5032 ctxt->instate = state;
5033 return;
5034}
5035
Owen Taylor3473f882001-02-23 17:55:21 +00005036
5037/**
5038 * xmlParsePITarget:
5039 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005040 *
Owen Taylor3473f882001-02-23 17:55:21 +00005041 * parse the name of a PI
5042 *
5043 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5044 *
5045 * Returns the PITarget name or NULL
5046 */
5047
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005048const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005049xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005050 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005051
5052 name = xmlParseName(ctxt);
5053 if ((name != NULL) &&
5054 ((name[0] == 'x') || (name[0] == 'X')) &&
5055 ((name[1] == 'm') || (name[1] == 'M')) &&
5056 ((name[2] == 'l') || (name[2] == 'L'))) {
5057 int i;
5058 if ((name[0] == 'x') && (name[1] == 'm') &&
5059 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005060 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005061 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005062 return(name);
5063 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005064 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005065 return(name);
5066 }
5067 for (i = 0;;i++) {
5068 if (xmlW3CPIs[i] == NULL) break;
5069 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5070 return(name);
5071 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005072 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5073 "xmlParsePITarget: invalid name prefix 'xml'\n",
5074 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005075 }
Daniel Veillard37334572008-07-31 08:20:02 +00005076 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005077 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005078 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5079 }
Owen Taylor3473f882001-02-23 17:55:21 +00005080 return(name);
5081}
5082
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005083#ifdef LIBXML_CATALOG_ENABLED
5084/**
5085 * xmlParseCatalogPI:
5086 * @ctxt: an XML parser context
5087 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005088 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005089 * parse an XML Catalog Processing Instruction.
5090 *
5091 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5092 *
5093 * Occurs only if allowed by the user and if happening in the Misc
5094 * part of the document before any doctype informations
5095 * This will add the given catalog to the parsing context in order
5096 * to be used if there is a resolution need further down in the document
5097 */
5098
5099static void
5100xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5101 xmlChar *URL = NULL;
5102 const xmlChar *tmp, *base;
5103 xmlChar marker;
5104
5105 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005106 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005107 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5108 goto error;
5109 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005110 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005111 if (*tmp != '=') {
5112 return;
5113 }
5114 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005115 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005116 marker = *tmp;
5117 if ((marker != '\'') && (marker != '"'))
5118 goto error;
5119 tmp++;
5120 base = tmp;
5121 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5122 if (*tmp == 0)
5123 goto error;
5124 URL = xmlStrndup(base, tmp - base);
5125 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005126 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005127 if (*tmp != 0)
5128 goto error;
5129
5130 if (URL != NULL) {
5131 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5132 xmlFree(URL);
5133 }
5134 return;
5135
5136error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005137 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5138 "Catalog PI syntax error: %s\n",
5139 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005140 if (URL != NULL)
5141 xmlFree(URL);
5142}
5143#endif
5144
Owen Taylor3473f882001-02-23 17:55:21 +00005145/**
5146 * xmlParsePI:
5147 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005148 *
Owen Taylor3473f882001-02-23 17:55:21 +00005149 * parse an XML Processing Instruction.
5150 *
5151 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5152 *
5153 * The processing is transfered to SAX once parsed.
5154 */
5155
5156void
5157xmlParsePI(xmlParserCtxtPtr ctxt) {
5158 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005159 size_t len = 0;
5160 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005161 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005162 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005163 xmlParserInputState state;
5164 int count = 0;
5165
5166 if ((RAW == '<') && (NXT(1) == '?')) {
5167 xmlParserInputPtr input = ctxt->input;
5168 state = ctxt->instate;
5169 ctxt->instate = XML_PARSER_PI;
5170 /*
5171 * this is a Processing Instruction.
5172 */
5173 SKIP(2);
5174 SHRINK;
5175
5176 /*
5177 * Parse the target name and check for special support like
5178 * namespace.
5179 */
5180 target = xmlParsePITarget(ctxt);
5181 if (target != NULL) {
5182 if ((RAW == '?') && (NXT(1) == '>')) {
5183 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005184 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5185 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005186 }
5187 SKIP(2);
5188
5189 /*
5190 * SAX: PI detected.
5191 */
5192 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5193 (ctxt->sax->processingInstruction != NULL))
5194 ctxt->sax->processingInstruction(ctxt->userData,
5195 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005196 if (ctxt->instate != XML_PARSER_EOF)
5197 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005198 return;
5199 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005200 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005201 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005202 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005203 ctxt->instate = state;
5204 return;
5205 }
5206 cur = CUR;
5207 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005208 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5209 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 }
5211 SKIP_BLANKS;
5212 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005213 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005214 ((cur != '?') || (NXT(1) != '>'))) {
5215 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005216 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005217 size_t new_size = size * 2;
5218 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005219 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005220 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005221 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005222 ctxt->instate = state;
5223 return;
5224 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005225 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005226 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005227 }
5228 count++;
5229 if (count > 50) {
5230 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005231 if (ctxt->instate == XML_PARSER_EOF) {
5232 xmlFree(buf);
5233 return;
5234 }
Owen Taylor3473f882001-02-23 17:55:21 +00005235 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005236 if ((len > XML_MAX_TEXT_LENGTH) &&
5237 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5238 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5239 "PI %s too big found", target);
5240 xmlFree(buf);
5241 ctxt->instate = state;
5242 return;
5243 }
Owen Taylor3473f882001-02-23 17:55:21 +00005244 }
5245 COPY_BUF(l,buf,len,cur);
5246 NEXTL(l);
5247 cur = CUR_CHAR(l);
5248 if (cur == 0) {
5249 SHRINK;
5250 GROW;
5251 cur = CUR_CHAR(l);
5252 }
5253 }
Daniel Veillard51304812012-07-19 20:34:26 +08005254 if ((len > XML_MAX_TEXT_LENGTH) &&
5255 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5256 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5257 "PI %s too big found", target);
5258 xmlFree(buf);
5259 ctxt->instate = state;
5260 return;
5261 }
Owen Taylor3473f882001-02-23 17:55:21 +00005262 buf[len] = 0;
5263 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005264 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5265 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005266 } else {
5267 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005268 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5269 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005270 }
5271 SKIP(2);
5272
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005273#ifdef LIBXML_CATALOG_ENABLED
5274 if (((state == XML_PARSER_MISC) ||
5275 (state == XML_PARSER_START)) &&
5276 (xmlStrEqual(target, XML_CATALOG_PI))) {
5277 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5278 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5279 (allow == XML_CATA_ALLOW_ALL))
5280 xmlParseCatalogPI(ctxt, buf);
5281 }
5282#endif
5283
5284
Owen Taylor3473f882001-02-23 17:55:21 +00005285 /*
5286 * SAX: PI detected.
5287 */
5288 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5289 (ctxt->sax->processingInstruction != NULL))
5290 ctxt->sax->processingInstruction(ctxt->userData,
5291 target, buf);
5292 }
5293 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005294 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005295 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005296 }
Chris Evans77404b82011-12-14 16:18:25 +08005297 if (ctxt->instate != XML_PARSER_EOF)
5298 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005299 }
5300}
5301
5302/**
5303 * xmlParseNotationDecl:
5304 * @ctxt: an XML parser context
5305 *
5306 * parse a notation declaration
5307 *
5308 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5309 *
5310 * Hence there is actually 3 choices:
5311 * 'PUBLIC' S PubidLiteral
5312 * 'PUBLIC' S PubidLiteral S SystemLiteral
5313 * and 'SYSTEM' S SystemLiteral
5314 *
5315 * See the NOTE on xmlParseExternalID().
5316 */
5317
5318void
5319xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005320 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005321 xmlChar *Pubid;
5322 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005323
Daniel Veillarda07050d2003-10-19 14:46:32 +00005324 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005325 xmlParserInputPtr input = ctxt->input;
5326 SHRINK;
5327 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005328 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5330 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005331 return;
5332 }
5333 SKIP_BLANKS;
5334
Daniel Veillard76d66f42001-05-16 21:05:17 +00005335 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005336 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005337 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005338 return;
5339 }
William M. Brack76e95df2003-10-18 16:20:14 +00005340 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005341 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005342 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005343 return;
5344 }
Daniel Veillard37334572008-07-31 08:20:02 +00005345 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005346 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005347 "colon are forbidden from notation names '%s'\n",
5348 name, NULL, NULL);
5349 }
Owen Taylor3473f882001-02-23 17:55:21 +00005350 SKIP_BLANKS;
5351
5352 /*
5353 * Parse the IDs.
5354 */
5355 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5356 SKIP_BLANKS;
5357
5358 if (RAW == '>') {
5359 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005360 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5361 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005362 }
5363 NEXT;
5364 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5365 (ctxt->sax->notationDecl != NULL))
5366 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5367 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005368 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005369 }
Owen Taylor3473f882001-02-23 17:55:21 +00005370 if (Systemid != NULL) xmlFree(Systemid);
5371 if (Pubid != NULL) xmlFree(Pubid);
5372 }
5373}
5374
5375/**
5376 * xmlParseEntityDecl:
5377 * @ctxt: an XML parser context
5378 *
5379 * parse <!ENTITY declarations
5380 *
5381 * [70] EntityDecl ::= GEDecl | PEDecl
5382 *
5383 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5384 *
5385 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5386 *
5387 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5388 *
5389 * [74] PEDef ::= EntityValue | ExternalID
5390 *
5391 * [76] NDataDecl ::= S 'NDATA' S Name
5392 *
5393 * [ VC: Notation Declared ]
5394 * The Name must match the declared name of a notation.
5395 */
5396
5397void
5398xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005399 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005400 xmlChar *value = NULL;
5401 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005402 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005403 int isParameter = 0;
5404 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005405 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005406
Daniel Veillard4c778d82005-01-23 17:37:44 +00005407 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005408 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005409 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005410 SHRINK;
5411 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005412 skipped = SKIP_BLANKS;
5413 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005414 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5415 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005416 }
Owen Taylor3473f882001-02-23 17:55:21 +00005417
5418 if (RAW == '%') {
5419 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005420 skipped = SKIP_BLANKS;
5421 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005422 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005424 }
Owen Taylor3473f882001-02-23 17:55:21 +00005425 isParameter = 1;
5426 }
5427
Daniel Veillard76d66f42001-05-16 21:05:17 +00005428 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005429 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005430 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5431 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005432 return;
5433 }
Daniel Veillard37334572008-07-31 08:20:02 +00005434 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005435 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005436 "colon are forbidden from entities names '%s'\n",
5437 name, NULL, NULL);
5438 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005439 skipped = SKIP_BLANKS;
5440 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005441 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5442 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005443 }
Owen Taylor3473f882001-02-23 17:55:21 +00005444
Daniel Veillardf5582f12002-06-11 10:08:16 +00005445 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005446 /*
5447 * handle the various case of definitions...
5448 */
5449 if (isParameter) {
5450 if ((RAW == '"') || (RAW == '\'')) {
5451 value = xmlParseEntityValue(ctxt, &orig);
5452 if (value) {
5453 if ((ctxt->sax != NULL) &&
5454 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5455 ctxt->sax->entityDecl(ctxt->userData, name,
5456 XML_INTERNAL_PARAMETER_ENTITY,
5457 NULL, NULL, value);
5458 }
5459 } else {
5460 URI = xmlParseExternalID(ctxt, &literal, 1);
5461 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005462 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005463 }
5464 if (URI) {
5465 xmlURIPtr uri;
5466
5467 uri = xmlParseURI((const char *) URI);
5468 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005469 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5470 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005471 /*
5472 * This really ought to be a well formedness error
5473 * but the XML Core WG decided otherwise c.f. issue
5474 * E26 of the XML erratas.
5475 */
Owen Taylor3473f882001-02-23 17:55:21 +00005476 } else {
5477 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005478 /*
5479 * Okay this is foolish to block those but not
5480 * invalid URIs.
5481 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005482 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005483 } else {
5484 if ((ctxt->sax != NULL) &&
5485 (!ctxt->disableSAX) &&
5486 (ctxt->sax->entityDecl != NULL))
5487 ctxt->sax->entityDecl(ctxt->userData, name,
5488 XML_EXTERNAL_PARAMETER_ENTITY,
5489 literal, URI, NULL);
5490 }
5491 xmlFreeURI(uri);
5492 }
5493 }
5494 }
5495 } else {
5496 if ((RAW == '"') || (RAW == '\'')) {
5497 value = xmlParseEntityValue(ctxt, &orig);
5498 if ((ctxt->sax != NULL) &&
5499 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5500 ctxt->sax->entityDecl(ctxt->userData, name,
5501 XML_INTERNAL_GENERAL_ENTITY,
5502 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005503 /*
5504 * For expat compatibility in SAX mode.
5505 */
5506 if ((ctxt->myDoc == NULL) ||
5507 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5508 if (ctxt->myDoc == NULL) {
5509 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005510 if (ctxt->myDoc == NULL) {
5511 xmlErrMemory(ctxt, "New Doc failed");
5512 return;
5513 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005514 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005515 }
5516 if (ctxt->myDoc->intSubset == NULL)
5517 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5518 BAD_CAST "fake", NULL, NULL);
5519
Daniel Veillard1af9a412003-08-20 22:54:39 +00005520 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5521 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005522 }
Owen Taylor3473f882001-02-23 17:55:21 +00005523 } else {
5524 URI = xmlParseExternalID(ctxt, &literal, 1);
5525 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005526 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005527 }
5528 if (URI) {
5529 xmlURIPtr uri;
5530
5531 uri = xmlParseURI((const char *)URI);
5532 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005533 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5534 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005535 /*
5536 * This really ought to be a well formedness error
5537 * but the XML Core WG decided otherwise c.f. issue
5538 * E26 of the XML erratas.
5539 */
Owen Taylor3473f882001-02-23 17:55:21 +00005540 } else {
5541 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005542 /*
5543 * Okay this is foolish to block those but not
5544 * invalid URIs.
5545 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005546 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005547 }
5548 xmlFreeURI(uri);
5549 }
5550 }
William M. Brack76e95df2003-10-18 16:20:14 +00005551 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005552 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5553 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005554 }
5555 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005556 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005557 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005558 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005561 }
5562 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005563 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5565 (ctxt->sax->unparsedEntityDecl != NULL))
5566 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5567 literal, URI, ndata);
5568 } else {
5569 if ((ctxt->sax != NULL) &&
5570 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5571 ctxt->sax->entityDecl(ctxt->userData, name,
5572 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5573 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005574 /*
5575 * For expat compatibility in SAX mode.
5576 * assuming the entity repalcement was asked for
5577 */
5578 if ((ctxt->replaceEntities != 0) &&
5579 ((ctxt->myDoc == NULL) ||
5580 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5581 if (ctxt->myDoc == NULL) {
5582 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005583 if (ctxt->myDoc == NULL) {
5584 xmlErrMemory(ctxt, "New Doc failed");
5585 return;
5586 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005587 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005588 }
5589
5590 if (ctxt->myDoc->intSubset == NULL)
5591 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5592 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005593 xmlSAX2EntityDecl(ctxt, name,
5594 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5595 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005596 }
Owen Taylor3473f882001-02-23 17:55:21 +00005597 }
5598 }
5599 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005600 if (ctxt->instate == XML_PARSER_EOF)
5601 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005602 SKIP_BLANKS;
5603 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005604 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005605 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005606 } else {
5607 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5609 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005610 }
5611 NEXT;
5612 }
5613 if (orig != NULL) {
5614 /*
5615 * Ugly mechanism to save the raw entity value.
5616 */
5617 xmlEntityPtr cur = NULL;
5618
5619 if (isParameter) {
5620 if ((ctxt->sax != NULL) &&
5621 (ctxt->sax->getParameterEntity != NULL))
5622 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5623 } else {
5624 if ((ctxt->sax != NULL) &&
5625 (ctxt->sax->getEntity != NULL))
5626 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005627 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005628 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005629 }
Owen Taylor3473f882001-02-23 17:55:21 +00005630 }
5631 if (cur != NULL) {
5632 if (cur->orig != NULL)
5633 xmlFree(orig);
5634 else
5635 cur->orig = orig;
5636 } else
5637 xmlFree(orig);
5638 }
Owen Taylor3473f882001-02-23 17:55:21 +00005639 if (value != NULL) xmlFree(value);
5640 if (URI != NULL) xmlFree(URI);
5641 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005642 }
5643}
5644
5645/**
5646 * xmlParseDefaultDecl:
5647 * @ctxt: an XML parser context
5648 * @value: Receive a possible fixed default value for the attribute
5649 *
5650 * Parse an attribute default declaration
5651 *
5652 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5653 *
5654 * [ VC: Required Attribute ]
5655 * if the default declaration is the keyword #REQUIRED, then the
5656 * attribute must be specified for all elements of the type in the
5657 * attribute-list declaration.
5658 *
5659 * [ VC: Attribute Default Legal ]
5660 * The declared default value must meet the lexical constraints of
5661 * the declared attribute type c.f. xmlValidateAttributeDecl()
5662 *
5663 * [ VC: Fixed Attribute Default ]
5664 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005665 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005666 *
5667 * [ WFC: No < in Attribute Values ]
5668 * handled in xmlParseAttValue()
5669 *
5670 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005671 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005672 */
5673
5674int
5675xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5676 int val;
5677 xmlChar *ret;
5678
5679 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005680 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005681 SKIP(9);
5682 return(XML_ATTRIBUTE_REQUIRED);
5683 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005684 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005685 SKIP(8);
5686 return(XML_ATTRIBUTE_IMPLIED);
5687 }
5688 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005689 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005690 SKIP(6);
5691 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005692 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005693 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5694 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005695 }
5696 SKIP_BLANKS;
5697 }
5698 ret = xmlParseAttValue(ctxt);
5699 ctxt->instate = XML_PARSER_DTD;
5700 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005701 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005702 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005703 } else
5704 *value = ret;
5705 return(val);
5706}
5707
5708/**
5709 * xmlParseNotationType:
5710 * @ctxt: an XML parser context
5711 *
5712 * parse an Notation attribute type.
5713 *
5714 * Note: the leading 'NOTATION' S part has already being parsed...
5715 *
5716 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5717 *
5718 * [ VC: Notation Attributes ]
5719 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005720 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005721 *
5722 * Returns: the notation attribute tree built while parsing
5723 */
5724
5725xmlEnumerationPtr
5726xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005727 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005728 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005729
5730 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005731 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005732 return(NULL);
5733 }
5734 SHRINK;
5735 do {
5736 NEXT;
5737 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005738 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005739 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005740 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5741 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005742 xmlFreeEnumeration(ret);
5743 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005745 tmp = ret;
5746 while (tmp != NULL) {
5747 if (xmlStrEqual(name, tmp->name)) {
5748 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5749 "standalone: attribute notation value token %s duplicated\n",
5750 name, NULL);
5751 if (!xmlDictOwns(ctxt->dict, name))
5752 xmlFree((xmlChar *) name);
5753 break;
5754 }
5755 tmp = tmp->next;
5756 }
5757 if (tmp == NULL) {
5758 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005759 if (cur == NULL) {
5760 xmlFreeEnumeration(ret);
5761 return(NULL);
5762 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005763 if (last == NULL) ret = last = cur;
5764 else {
5765 last->next = cur;
5766 last = cur;
5767 }
Owen Taylor3473f882001-02-23 17:55:21 +00005768 }
5769 SKIP_BLANKS;
5770 } while (RAW == '|');
5771 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005772 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005773 xmlFreeEnumeration(ret);
5774 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005775 }
5776 NEXT;
5777 return(ret);
5778}
5779
5780/**
5781 * xmlParseEnumerationType:
5782 * @ctxt: an XML parser context
5783 *
5784 * parse an Enumeration attribute type.
5785 *
5786 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5787 *
5788 * [ VC: Enumeration ]
5789 * Values of this type must match one of the Nmtoken tokens in
5790 * the declaration
5791 *
5792 * Returns: the enumeration attribute tree built while parsing
5793 */
5794
5795xmlEnumerationPtr
5796xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5797 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005798 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005799
5800 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005801 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005802 return(NULL);
5803 }
5804 SHRINK;
5805 do {
5806 NEXT;
5807 SKIP_BLANKS;
5808 name = xmlParseNmtoken(ctxt);
5809 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005810 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005811 return(ret);
5812 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005813 tmp = ret;
5814 while (tmp != NULL) {
5815 if (xmlStrEqual(name, tmp->name)) {
5816 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5817 "standalone: attribute enumeration value token %s duplicated\n",
5818 name, NULL);
5819 if (!xmlDictOwns(ctxt->dict, name))
5820 xmlFree(name);
5821 break;
5822 }
5823 tmp = tmp->next;
5824 }
5825 if (tmp == NULL) {
5826 cur = xmlCreateEnumeration(name);
5827 if (!xmlDictOwns(ctxt->dict, name))
5828 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005829 if (cur == NULL) {
5830 xmlFreeEnumeration(ret);
5831 return(NULL);
5832 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005833 if (last == NULL) ret = last = cur;
5834 else {
5835 last->next = cur;
5836 last = cur;
5837 }
Owen Taylor3473f882001-02-23 17:55:21 +00005838 }
5839 SKIP_BLANKS;
5840 } while (RAW == '|');
5841 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005842 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 return(ret);
5844 }
5845 NEXT;
5846 return(ret);
5847}
5848
5849/**
5850 * xmlParseEnumeratedType:
5851 * @ctxt: an XML parser context
5852 * @tree: the enumeration tree built while parsing
5853 *
5854 * parse an Enumerated attribute type.
5855 *
5856 * [57] EnumeratedType ::= NotationType | Enumeration
5857 *
5858 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5859 *
5860 *
5861 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5862 */
5863
5864int
5865xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005866 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005867 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005868 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005869 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5870 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005871 return(0);
5872 }
5873 SKIP_BLANKS;
5874 *tree = xmlParseNotationType(ctxt);
5875 if (*tree == NULL) return(0);
5876 return(XML_ATTRIBUTE_NOTATION);
5877 }
5878 *tree = xmlParseEnumerationType(ctxt);
5879 if (*tree == NULL) return(0);
5880 return(XML_ATTRIBUTE_ENUMERATION);
5881}
5882
5883/**
5884 * xmlParseAttributeType:
5885 * @ctxt: an XML parser context
5886 * @tree: the enumeration tree built while parsing
5887 *
5888 * parse the Attribute list def for an element
5889 *
5890 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5891 *
5892 * [55] StringType ::= 'CDATA'
5893 *
5894 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5895 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5896 *
5897 * Validity constraints for attribute values syntax are checked in
5898 * xmlValidateAttributeValue()
5899 *
5900 * [ VC: ID ]
5901 * Values of type ID must match the Name production. A name must not
5902 * appear more than once in an XML document as a value of this type;
5903 * i.e., ID values must uniquely identify the elements which bear them.
5904 *
5905 * [ VC: One ID per Element Type ]
5906 * No element type may have more than one ID attribute specified.
5907 *
5908 * [ VC: ID Attribute Default ]
5909 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5910 *
5911 * [ VC: IDREF ]
5912 * Values of type IDREF must match the Name production, and values
5913 * of type IDREFS must match Names; each IDREF Name must match the value
5914 * of an ID attribute on some element in the XML document; i.e. IDREF
5915 * values must match the value of some ID attribute.
5916 *
5917 * [ VC: Entity Name ]
5918 * Values of type ENTITY must match the Name production, values
5919 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005920 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005921 *
5922 * [ VC: Name Token ]
5923 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005924 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005925 *
5926 * Returns the attribute type
5927 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005928int
Owen Taylor3473f882001-02-23 17:55:21 +00005929xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5930 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005931 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005932 SKIP(5);
5933 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005934 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005935 SKIP(6);
5936 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005937 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005938 SKIP(5);
5939 return(XML_ATTRIBUTE_IDREF);
5940 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5941 SKIP(2);
5942 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005943 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005944 SKIP(6);
5945 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005946 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005947 SKIP(8);
5948 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005949 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005950 SKIP(8);
5951 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005952 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005953 SKIP(7);
5954 return(XML_ATTRIBUTE_NMTOKEN);
5955 }
5956 return(xmlParseEnumeratedType(ctxt, tree));
5957}
5958
5959/**
5960 * xmlParseAttributeListDecl:
5961 * @ctxt: an XML parser context
5962 *
5963 * : parse the Attribute list def for an element
5964 *
5965 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5966 *
5967 * [53] AttDef ::= S Name S AttType S DefaultDecl
5968 *
5969 */
5970void
5971xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005972 const xmlChar *elemName;
5973 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005974 xmlEnumerationPtr tree;
5975
Daniel Veillarda07050d2003-10-19 14:46:32 +00005976 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005977 xmlParserInputPtr input = ctxt->input;
5978
5979 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005980 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005981 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005982 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005983 }
5984 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005985 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005986 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005987 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5988 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005989 return;
5990 }
5991 SKIP_BLANKS;
5992 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005993 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005994 const xmlChar *check = CUR_PTR;
5995 int type;
5996 int def;
5997 xmlChar *defaultValue = NULL;
5998
5999 GROW;
6000 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006001 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006002 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006003 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6004 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006005 break;
6006 }
6007 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006008 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006009 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006010 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006011 break;
6012 }
6013 SKIP_BLANKS;
6014
6015 type = xmlParseAttributeType(ctxt, &tree);
6016 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006017 break;
6018 }
6019
6020 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006021 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006022 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6023 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006024 if (tree != NULL)
6025 xmlFreeEnumeration(tree);
6026 break;
6027 }
6028 SKIP_BLANKS;
6029
6030 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6031 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006032 if (defaultValue != NULL)
6033 xmlFree(defaultValue);
6034 if (tree != NULL)
6035 xmlFreeEnumeration(tree);
6036 break;
6037 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006038 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6039 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006040
6041 GROW;
6042 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006043 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006044 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006045 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006046 if (defaultValue != NULL)
6047 xmlFree(defaultValue);
6048 if (tree != NULL)
6049 xmlFreeEnumeration(tree);
6050 break;
6051 }
6052 SKIP_BLANKS;
6053 }
6054 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006055 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6056 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006057 if (defaultValue != NULL)
6058 xmlFree(defaultValue);
6059 if (tree != NULL)
6060 xmlFreeEnumeration(tree);
6061 break;
6062 }
6063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6064 (ctxt->sax->attributeDecl != NULL))
6065 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6066 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006067 else if (tree != NULL)
6068 xmlFreeEnumeration(tree);
6069
6070 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006071 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006072 (def != XML_ATTRIBUTE_REQUIRED)) {
6073 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6074 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006075 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006076 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6077 }
Owen Taylor3473f882001-02-23 17:55:21 +00006078 if (defaultValue != NULL)
6079 xmlFree(defaultValue);
6080 GROW;
6081 }
6082 if (RAW == '>') {
6083 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006084 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6085 "Attribute list declaration doesn't start and stop in the same entity\n",
6086 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006087 }
6088 NEXT;
6089 }
Owen Taylor3473f882001-02-23 17:55:21 +00006090 }
6091}
6092
6093/**
6094 * xmlParseElementMixedContentDecl:
6095 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006096 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006097 *
6098 * parse the declaration for a Mixed Element content
6099 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006100 *
Owen Taylor3473f882001-02-23 17:55:21 +00006101 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6102 * '(' S? '#PCDATA' S? ')'
6103 *
6104 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6105 *
6106 * [ VC: No Duplicate Types ]
6107 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006108 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006109 *
6110 * returns: the list of the xmlElementContentPtr describing the element choices
6111 */
6112xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006113xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006114 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006115 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006116
6117 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006118 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006119 SKIP(7);
6120 SKIP_BLANKS;
6121 SHRINK;
6122 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006123 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006124 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6125"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006126 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006127 }
Owen Taylor3473f882001-02-23 17:55:21 +00006128 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006129 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006130 if (ret == NULL)
6131 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006132 if (RAW == '*') {
6133 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6134 NEXT;
6135 }
6136 return(ret);
6137 }
6138 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006139 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006140 if (ret == NULL) return(NULL);
6141 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006142 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006143 NEXT;
6144 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006145 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006146 if (ret == NULL) return(NULL);
6147 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006148 if (cur != NULL)
6149 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006150 cur = ret;
6151 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006152 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006154 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006155 if (n->c1 != NULL)
6156 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006157 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006158 if (n != NULL)
6159 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006160 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006161 }
6162 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006163 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006164 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006165 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006166 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006167 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006168 return(NULL);
6169 }
6170 SKIP_BLANKS;
6171 GROW;
6172 }
6173 if ((RAW == ')') && (NXT(1) == '*')) {
6174 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006175 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006176 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006177 if (cur->c2 != NULL)
6178 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006179 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006180 if (ret != NULL)
6181 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006182 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006183 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6184"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006185 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006186 }
Owen Taylor3473f882001-02-23 17:55:21 +00006187 SKIP(2);
6188 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006189 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006190 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006191 return(NULL);
6192 }
6193
6194 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006195 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006196 }
6197 return(ret);
6198}
6199
6200/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006201 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006202 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006203 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006204 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006205 *
6206 * parse the declaration for a Mixed Element content
6207 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006208 *
Owen Taylor3473f882001-02-23 17:55:21 +00006209 *
6210 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6211 *
6212 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6213 *
6214 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6215 *
6216 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6217 *
6218 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6219 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006220 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006221 * opening or closing parentheses in a choice, seq, or Mixed
6222 * construct is contained in the replacement text for a parameter
6223 * entity, both must be contained in the same replacement text. For
6224 * interoperability, if a parameter-entity reference appears in a
6225 * choice, seq, or Mixed construct, its replacement text should not
6226 * be empty, and neither the first nor last non-blank character of
6227 * the replacement text should be a connector (| or ,).
6228 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006229 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006230 * hierarchy.
6231 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006232static xmlElementContentPtr
6233xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6234 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006235 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006236 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006237 xmlChar type = 0;
6238
Daniel Veillard489f9672009-08-10 16:49:30 +02006239 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6240 (depth > 2048)) {
6241 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6242"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6243 depth);
6244 return(NULL);
6245 }
Owen Taylor3473f882001-02-23 17:55:21 +00006246 SKIP_BLANKS;
6247 GROW;
6248 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006249 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006250
Owen Taylor3473f882001-02-23 17:55:21 +00006251 /* Recurse on first child */
6252 NEXT;
6253 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006254 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6255 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006256 SKIP_BLANKS;
6257 GROW;
6258 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006259 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006260 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006261 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006262 return(NULL);
6263 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006264 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006265 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006266 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006267 return(NULL);
6268 }
Owen Taylor3473f882001-02-23 17:55:21 +00006269 GROW;
6270 if (RAW == '?') {
6271 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6272 NEXT;
6273 } else if (RAW == '*') {
6274 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6275 NEXT;
6276 } else if (RAW == '+') {
6277 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6278 NEXT;
6279 } else {
6280 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6281 }
Owen Taylor3473f882001-02-23 17:55:21 +00006282 GROW;
6283 }
6284 SKIP_BLANKS;
6285 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006286 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006287 /*
6288 * Each loop we parse one separator and one element.
6289 */
6290 if (RAW == ',') {
6291 if (type == 0) type = CUR;
6292
6293 /*
6294 * Detect "Name | Name , Name" error
6295 */
6296 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006297 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006298 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006299 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006300 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006301 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006302 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006303 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006304 return(NULL);
6305 }
6306 NEXT;
6307
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006308 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006309 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006310 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006311 xmlFreeDocElementContent(ctxt->myDoc, last);
6312 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006313 return(NULL);
6314 }
6315 if (last == NULL) {
6316 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006317 if (ret != NULL)
6318 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006319 ret = cur = op;
6320 } else {
6321 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006322 if (op != NULL)
6323 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006324 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006325 if (last != NULL)
6326 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006327 cur =op;
6328 last = NULL;
6329 }
6330 } else if (RAW == '|') {
6331 if (type == 0) type = CUR;
6332
6333 /*
6334 * Detect "Name , Name | Name" error
6335 */
6336 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006337 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006338 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006339 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006340 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006341 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006342 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006343 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006344 return(NULL);
6345 }
6346 NEXT;
6347
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006348 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006349 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006350 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006351 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006352 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006353 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006354 return(NULL);
6355 }
6356 if (last == NULL) {
6357 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006358 if (ret != NULL)
6359 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006360 ret = cur = op;
6361 } else {
6362 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006363 if (op != NULL)
6364 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006365 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006366 if (last != NULL)
6367 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006368 cur =op;
6369 last = NULL;
6370 }
6371 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006372 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006373 if ((last != NULL) && (last != ret))
6374 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006375 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006376 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006377 return(NULL);
6378 }
6379 GROW;
6380 SKIP_BLANKS;
6381 GROW;
6382 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006383 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006384 /* Recurse on second child */
6385 NEXT;
6386 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006387 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6388 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006389 SKIP_BLANKS;
6390 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006391 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006392 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006393 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006394 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006395 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006396 return(NULL);
6397 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006398 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006399 if (last == NULL) {
6400 if (ret != NULL)
6401 xmlFreeDocElementContent(ctxt->myDoc, ret);
6402 return(NULL);
6403 }
Owen Taylor3473f882001-02-23 17:55:21 +00006404 if (RAW == '?') {
6405 last->ocur = XML_ELEMENT_CONTENT_OPT;
6406 NEXT;
6407 } else if (RAW == '*') {
6408 last->ocur = XML_ELEMENT_CONTENT_MULT;
6409 NEXT;
6410 } else if (RAW == '+') {
6411 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6412 NEXT;
6413 } else {
6414 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6415 }
6416 }
6417 SKIP_BLANKS;
6418 GROW;
6419 }
6420 if ((cur != NULL) && (last != NULL)) {
6421 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006422 if (last != NULL)
6423 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006424 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006425 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006426 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6427"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006428 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006429 }
Owen Taylor3473f882001-02-23 17:55:21 +00006430 NEXT;
6431 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006432 if (ret != NULL) {
6433 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6434 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6435 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6436 else
6437 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6438 }
Owen Taylor3473f882001-02-23 17:55:21 +00006439 NEXT;
6440 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006441 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006442 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006443 cur = ret;
6444 /*
6445 * Some normalization:
6446 * (a | b* | c?)* == (a | b | c)*
6447 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006448 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006449 if ((cur->c1 != NULL) &&
6450 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6451 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6452 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6453 if ((cur->c2 != NULL) &&
6454 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6456 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6457 cur = cur->c2;
6458 }
6459 }
Owen Taylor3473f882001-02-23 17:55:21 +00006460 NEXT;
6461 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006462 if (ret != NULL) {
6463 int found = 0;
6464
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006465 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6466 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6467 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006468 else
6469 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006470 /*
6471 * Some normalization:
6472 * (a | b*)+ == (a | b)*
6473 * (a | b?)+ == (a | b)*
6474 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006475 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006476 if ((cur->c1 != NULL) &&
6477 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6478 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6479 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6480 found = 1;
6481 }
6482 if ((cur->c2 != NULL) &&
6483 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6484 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6485 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6486 found = 1;
6487 }
6488 cur = cur->c2;
6489 }
6490 if (found)
6491 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6492 }
Owen Taylor3473f882001-02-23 17:55:21 +00006493 NEXT;
6494 }
6495 return(ret);
6496}
6497
6498/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006499 * xmlParseElementChildrenContentDecl:
6500 * @ctxt: an XML parser context
6501 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006502 *
6503 * parse the declaration for a Mixed Element content
6504 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6505 *
6506 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6507 *
6508 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6509 *
6510 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6511 *
6512 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6513 *
6514 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6515 * TODO Parameter-entity replacement text must be properly nested
6516 * with parenthesized groups. That is to say, if either of the
6517 * opening or closing parentheses in a choice, seq, or Mixed
6518 * construct is contained in the replacement text for a parameter
6519 * entity, both must be contained in the same replacement text. For
6520 * interoperability, if a parameter-entity reference appears in a
6521 * choice, seq, or Mixed construct, its replacement text should not
6522 * be empty, and neither the first nor last non-blank character of
6523 * the replacement text should be a connector (| or ,).
6524 *
6525 * Returns the tree of xmlElementContentPtr describing the element
6526 * hierarchy.
6527 */
6528xmlElementContentPtr
6529xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6530 /* stub left for API/ABI compat */
6531 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6532}
6533
6534/**
Owen Taylor3473f882001-02-23 17:55:21 +00006535 * xmlParseElementContentDecl:
6536 * @ctxt: an XML parser context
6537 * @name: the name of the element being defined.
6538 * @result: the Element Content pointer will be stored here if any
6539 *
6540 * parse the declaration for an Element content either Mixed or Children,
6541 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006542 *
Owen Taylor3473f882001-02-23 17:55:21 +00006543 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6544 *
6545 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6546 */
6547
6548int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006549xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006550 xmlElementContentPtr *result) {
6551
6552 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006553 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006554 int res;
6555
6556 *result = NULL;
6557
6558 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006559 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006560 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006561 return(-1);
6562 }
6563 NEXT;
6564 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006565 if (ctxt->instate == XML_PARSER_EOF)
6566 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006567 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006568 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006569 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006570 res = XML_ELEMENT_TYPE_MIXED;
6571 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006572 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006573 res = XML_ELEMENT_TYPE_ELEMENT;
6574 }
Owen Taylor3473f882001-02-23 17:55:21 +00006575 SKIP_BLANKS;
6576 *result = tree;
6577 return(res);
6578}
6579
6580/**
6581 * xmlParseElementDecl:
6582 * @ctxt: an XML parser context
6583 *
6584 * parse an Element declaration.
6585 *
6586 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6587 *
6588 * [ VC: Unique Element Type Declaration ]
6589 * No element type may be declared more than once
6590 *
6591 * Returns the type of the element, or -1 in case of error
6592 */
6593int
6594xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006595 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006596 int ret = -1;
6597 xmlElementContentPtr content = NULL;
6598
Daniel Veillard4c778d82005-01-23 17:37:44 +00006599 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006600 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006601 xmlParserInputPtr input = ctxt->input;
6602
6603 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006604 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006605 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6606 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006607 }
6608 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006609 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006610 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006611 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6612 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006613 return(-1);
6614 }
6615 while ((RAW == 0) && (ctxt->inputNr > 1))
6616 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006617 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006618 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6619 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006620 }
6621 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006622 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006623 SKIP(5);
6624 /*
6625 * Element must always be empty.
6626 */
6627 ret = XML_ELEMENT_TYPE_EMPTY;
6628 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6629 (NXT(2) == 'Y')) {
6630 SKIP(3);
6631 /*
6632 * Element is a generic container.
6633 */
6634 ret = XML_ELEMENT_TYPE_ANY;
6635 } else if (RAW == '(') {
6636 ret = xmlParseElementContentDecl(ctxt, name, &content);
6637 } else {
6638 /*
6639 * [ WFC: PEs in Internal Subset ] error handling.
6640 */
6641 if ((RAW == '%') && (ctxt->external == 0) &&
6642 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006643 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006644 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006645 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006646 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006647 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6648 }
Owen Taylor3473f882001-02-23 17:55:21 +00006649 return(-1);
6650 }
6651
6652 SKIP_BLANKS;
6653 /*
6654 * Pop-up of finished entities.
6655 */
6656 while ((RAW == 0) && (ctxt->inputNr > 1))
6657 xmlPopInput(ctxt);
6658 SKIP_BLANKS;
6659
6660 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006661 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006662 if (content != NULL) {
6663 xmlFreeDocElementContent(ctxt->myDoc, content);
6664 }
Owen Taylor3473f882001-02-23 17:55:21 +00006665 } else {
6666 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006667 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6668 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006669 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006670
Owen Taylor3473f882001-02-23 17:55:21 +00006671 NEXT;
6672 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006673 (ctxt->sax->elementDecl != NULL)) {
6674 if (content != NULL)
6675 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006676 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6677 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006678 if ((content != NULL) && (content->parent == NULL)) {
6679 /*
6680 * this is a trick: if xmlAddElementDecl is called,
6681 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006682 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006683 * interfaces or change the API/ABI
6684 */
6685 xmlFreeDocElementContent(ctxt->myDoc, content);
6686 }
6687 } else if (content != NULL) {
6688 xmlFreeDocElementContent(ctxt->myDoc, content);
6689 }
Owen Taylor3473f882001-02-23 17:55:21 +00006690 }
Owen Taylor3473f882001-02-23 17:55:21 +00006691 }
6692 return(ret);
6693}
6694
6695/**
Owen Taylor3473f882001-02-23 17:55:21 +00006696 * xmlParseConditionalSections
6697 * @ctxt: an XML parser context
6698 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006699 * [61] conditionalSect ::= includeSect | ignoreSect
6700 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006701 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6702 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6703 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6704 */
6705
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006706static void
Owen Taylor3473f882001-02-23 17:55:21 +00006707xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006708 int id = ctxt->input->id;
6709
Owen Taylor3473f882001-02-23 17:55:21 +00006710 SKIP(3);
6711 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006712 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006713 SKIP(7);
6714 SKIP_BLANKS;
6715 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006716 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006717 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006718 if (ctxt->input->id != id) {
6719 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6720 "All markup of the conditional section is not in the same entity\n",
6721 NULL, NULL);
6722 }
Owen Taylor3473f882001-02-23 17:55:21 +00006723 NEXT;
6724 }
6725 if (xmlParserDebugEntities) {
6726 if ((ctxt->input != NULL) && (ctxt->input->filename))
6727 xmlGenericError(xmlGenericErrorContext,
6728 "%s(%d): ", ctxt->input->filename,
6729 ctxt->input->line);
6730 xmlGenericError(xmlGenericErrorContext,
6731 "Entering INCLUDE Conditional Section\n");
6732 }
6733
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006734 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6735 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006736 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006737 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006738
6739 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6740 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006741 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006742 NEXT;
6743 } else if (RAW == '%') {
6744 xmlParsePEReference(ctxt);
6745 } else
6746 xmlParseMarkupDecl(ctxt);
6747
6748 /*
6749 * Pop-up of finished entities.
6750 */
6751 while ((RAW == 0) && (ctxt->inputNr > 1))
6752 xmlPopInput(ctxt);
6753
Daniel Veillardfdc91562002-07-01 21:52:03 +00006754 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006755 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006756 break;
6757 }
6758 }
6759 if (xmlParserDebugEntities) {
6760 if ((ctxt->input != NULL) && (ctxt->input->filename))
6761 xmlGenericError(xmlGenericErrorContext,
6762 "%s(%d): ", ctxt->input->filename,
6763 ctxt->input->line);
6764 xmlGenericError(xmlGenericErrorContext,
6765 "Leaving INCLUDE Conditional Section\n");
6766 }
6767
Daniel Veillarda07050d2003-10-19 14:46:32 +00006768 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006769 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006770 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006771 int depth = 0;
6772
6773 SKIP(6);
6774 SKIP_BLANKS;
6775 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006776 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006777 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006778 if (ctxt->input->id != id) {
6779 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6780 "All markup of the conditional section is not in the same entity\n",
6781 NULL, NULL);
6782 }
Owen Taylor3473f882001-02-23 17:55:21 +00006783 NEXT;
6784 }
6785 if (xmlParserDebugEntities) {
6786 if ((ctxt->input != NULL) && (ctxt->input->filename))
6787 xmlGenericError(xmlGenericErrorContext,
6788 "%s(%d): ", ctxt->input->filename,
6789 ctxt->input->line);
6790 xmlGenericError(xmlGenericErrorContext,
6791 "Entering IGNORE Conditional Section\n");
6792 }
6793
6794 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006795 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006796 * But disable SAX event generating DTD building in the meantime
6797 */
6798 state = ctxt->disableSAX;
6799 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006800 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006801 ctxt->instate = XML_PARSER_IGNORE;
6802
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006803 while (((depth >= 0) && (RAW != 0)) &&
6804 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006805 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6806 depth++;
6807 SKIP(3);
6808 continue;
6809 }
6810 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6811 if (--depth >= 0) SKIP(3);
6812 continue;
6813 }
6814 NEXT;
6815 continue;
6816 }
6817
6818 ctxt->disableSAX = state;
6819 ctxt->instate = instate;
6820
6821 if (xmlParserDebugEntities) {
6822 if ((ctxt->input != NULL) && (ctxt->input->filename))
6823 xmlGenericError(xmlGenericErrorContext,
6824 "%s(%d): ", ctxt->input->filename,
6825 ctxt->input->line);
6826 xmlGenericError(xmlGenericErrorContext,
6827 "Leaving IGNORE Conditional Section\n");
6828 }
6829
6830 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006831 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006832 }
6833
6834 if (RAW == 0)
6835 SHRINK;
6836
6837 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006838 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006839 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006840 if (ctxt->input->id != id) {
6841 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6842 "All markup of the conditional section is not in the same entity\n",
6843 NULL, NULL);
6844 }
Owen Taylor3473f882001-02-23 17:55:21 +00006845 SKIP(3);
6846 }
6847}
6848
6849/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006850 * xmlParseMarkupDecl:
6851 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006852 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006853 * parse Markup declarations
6854 *
6855 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6856 * NotationDecl | PI | Comment
6857 *
6858 * [ VC: Proper Declaration/PE Nesting ]
6859 * Parameter-entity replacement text must be properly nested with
6860 * markup declarations. That is to say, if either the first character
6861 * or the last character of a markup declaration (markupdecl above) is
6862 * contained in the replacement text for a parameter-entity reference,
6863 * both must be contained in the same replacement text.
6864 *
6865 * [ WFC: PEs in Internal Subset ]
6866 * In the internal DTD subset, parameter-entity references can occur
6867 * only where markup declarations can occur, not within markup declarations.
6868 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006869 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006870 */
6871void
6872xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6873 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006874 if (CUR == '<') {
6875 if (NXT(1) == '!') {
6876 switch (NXT(2)) {
6877 case 'E':
6878 if (NXT(3) == 'L')
6879 xmlParseElementDecl(ctxt);
6880 else if (NXT(3) == 'N')
6881 xmlParseEntityDecl(ctxt);
6882 break;
6883 case 'A':
6884 xmlParseAttributeListDecl(ctxt);
6885 break;
6886 case 'N':
6887 xmlParseNotationDecl(ctxt);
6888 break;
6889 case '-':
6890 xmlParseComment(ctxt);
6891 break;
6892 default:
6893 /* there is an error but it will be detected later */
6894 break;
6895 }
6896 } else if (NXT(1) == '?') {
6897 xmlParsePI(ctxt);
6898 }
6899 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006900 /*
6901 * This is only for internal subset. On external entities,
6902 * the replacement is done before parsing stage
6903 */
6904 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6905 xmlParsePEReference(ctxt);
6906
6907 /*
6908 * Conditional sections are allowed from entities included
6909 * by PE References in the internal subset.
6910 */
6911 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6912 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6913 xmlParseConditionalSections(ctxt);
6914 }
6915 }
6916
6917 ctxt->instate = XML_PARSER_DTD;
6918}
6919
6920/**
6921 * xmlParseTextDecl:
6922 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006923 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006924 * parse an XML declaration header for external entities
6925 *
6926 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006927 */
6928
6929void
6930xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6931 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006932 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006933
6934 /*
6935 * We know that '<?xml' is here.
6936 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006937 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006938 SKIP(5);
6939 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006940 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006941 return;
6942 }
6943
William M. Brack76e95df2003-10-18 16:20:14 +00006944 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6946 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006947 }
6948 SKIP_BLANKS;
6949
6950 /*
6951 * We may have the VersionInfo here.
6952 */
6953 version = xmlParseVersionInfo(ctxt);
6954 if (version == NULL)
6955 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006956 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006957 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006958 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6959 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006960 }
6961 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006962 ctxt->input->version = version;
6963
6964 /*
6965 * We must have the encoding declaration
6966 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006967 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006968 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6969 /*
6970 * The XML REC instructs us to stop parsing right here
6971 */
6972 return;
6973 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006974 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6975 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6976 "Missing encoding in text declaration\n");
6977 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006978
6979 SKIP_BLANKS;
6980 if ((RAW == '?') && (NXT(1) == '>')) {
6981 SKIP(2);
6982 } else if (RAW == '>') {
6983 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006984 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006985 NEXT;
6986 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006987 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006988 MOVETO_ENDTAG(CUR_PTR);
6989 NEXT;
6990 }
6991}
6992
6993/**
Owen Taylor3473f882001-02-23 17:55:21 +00006994 * xmlParseExternalSubset:
6995 * @ctxt: an XML parser context
6996 * @ExternalID: the external identifier
6997 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006998 *
Owen Taylor3473f882001-02-23 17:55:21 +00006999 * parse Markup declarations from an external subset
7000 *
7001 * [30] extSubset ::= textDecl? extSubsetDecl
7002 *
7003 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7004 */
7005void
7006xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7007 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007008 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007009 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007010
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007011 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007012 (ctxt->input->end - ctxt->input->cur >= 4)) {
7013 xmlChar start[4];
7014 xmlCharEncoding enc;
7015
7016 start[0] = RAW;
7017 start[1] = NXT(1);
7018 start[2] = NXT(2);
7019 start[3] = NXT(3);
7020 enc = xmlDetectCharEncoding(start, 4);
7021 if (enc != XML_CHAR_ENCODING_NONE)
7022 xmlSwitchEncoding(ctxt, enc);
7023 }
7024
Daniel Veillarda07050d2003-10-19 14:46:32 +00007025 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007026 xmlParseTextDecl(ctxt);
7027 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7028 /*
7029 * The XML REC instructs us to stop parsing right here
7030 */
7031 ctxt->instate = XML_PARSER_EOF;
7032 return;
7033 }
7034 }
7035 if (ctxt->myDoc == NULL) {
7036 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007037 if (ctxt->myDoc == NULL) {
7038 xmlErrMemory(ctxt, "New Doc failed");
7039 return;
7040 }
7041 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007042 }
7043 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7044 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7045
7046 ctxt->instate = XML_PARSER_DTD;
7047 ctxt->external = 1;
7048 while (((RAW == '<') && (NXT(1) == '?')) ||
7049 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007050 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007051 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007052 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007053
7054 GROW;
7055 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7056 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007057 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007058 NEXT;
7059 } else if (RAW == '%') {
7060 xmlParsePEReference(ctxt);
7061 } else
7062 xmlParseMarkupDecl(ctxt);
7063
7064 /*
7065 * Pop-up of finished entities.
7066 */
7067 while ((RAW == 0) && (ctxt->inputNr > 1))
7068 xmlPopInput(ctxt);
7069
Daniel Veillardfdc91562002-07-01 21:52:03 +00007070 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007071 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007072 break;
7073 }
7074 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007075
Owen Taylor3473f882001-02-23 17:55:21 +00007076 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007077 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007078 }
7079
7080}
7081
7082/**
7083 * xmlParseReference:
7084 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007085 *
Owen Taylor3473f882001-02-23 17:55:21 +00007086 * parse and handle entity references in content, depending on the SAX
7087 * interface, this may end-up in a call to character() if this is a
7088 * CharRef, a predefined entity, if there is no reference() callback.
7089 * or if the parser was asked to switch to that mode.
7090 *
7091 * [67] Reference ::= EntityRef | CharRef
7092 */
7093void
7094xmlParseReference(xmlParserCtxtPtr ctxt) {
7095 xmlEntityPtr ent;
7096 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007097 int was_checked;
7098 xmlNodePtr list = NULL;
7099 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007100
Daniel Veillard0161e632008-08-28 15:36:32 +00007101
7102 if (RAW != '&')
7103 return;
7104
7105 /*
7106 * Simple case of a CharRef
7107 */
Owen Taylor3473f882001-02-23 17:55:21 +00007108 if (NXT(1) == '#') {
7109 int i = 0;
7110 xmlChar out[10];
7111 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007112 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007113
Daniel Veillarddc171602008-03-26 17:41:38 +00007114 if (value == 0)
7115 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007116 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7117 /*
7118 * So we are using non-UTF-8 buffers
7119 * Check that the char fit on 8bits, if not
7120 * generate a CharRef.
7121 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007122 if (value <= 0xFF) {
7123 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007124 out[1] = 0;
7125 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7126 (!ctxt->disableSAX))
7127 ctxt->sax->characters(ctxt->userData, out, 1);
7128 } else {
7129 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007130 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007131 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007132 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007133 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7134 (!ctxt->disableSAX))
7135 ctxt->sax->reference(ctxt->userData, out);
7136 }
7137 } else {
7138 /*
7139 * Just encode the value in UTF-8
7140 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007141 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007142 out[i] = 0;
7143 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7144 (!ctxt->disableSAX))
7145 ctxt->sax->characters(ctxt->userData, out, i);
7146 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007147 return;
7148 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007149
Daniel Veillard0161e632008-08-28 15:36:32 +00007150 /*
7151 * We are seeing an entity reference
7152 */
7153 ent = xmlParseEntityRef(ctxt);
7154 if (ent == NULL) return;
7155 if (!ctxt->wellFormed)
7156 return;
7157 was_checked = ent->checked;
7158
7159 /* special case of predefined entities */
7160 if ((ent->name == NULL) ||
7161 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7162 val = ent->content;
7163 if (val == NULL) return;
7164 /*
7165 * inline the entity.
7166 */
7167 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7168 (!ctxt->disableSAX))
7169 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7170 return;
7171 }
7172
7173 /*
7174 * The first reference to the entity trigger a parsing phase
7175 * where the ent->children is filled with the result from
7176 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007177 * Note: external parsed entities will not be loaded, it is not
7178 * required for a non-validating parser, unless the parsing option
7179 * of validating, or substituting entities were given. Doing so is
7180 * far more secure as the parser will only process data coming from
7181 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007182 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007183 if ((ent->checked == 0) &&
7184 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7185 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007186 unsigned long oldnbent = ctxt->nbentities;
7187
7188 /*
7189 * This is a bit hackish but this seems the best
7190 * way to make sure both SAX and DOM entity support
7191 * behaves okay.
7192 */
7193 void *user_data;
7194 if (ctxt->userData == ctxt)
7195 user_data = NULL;
7196 else
7197 user_data = ctxt->userData;
7198
7199 /*
7200 * Check that this entity is well formed
7201 * 4.3.2: An internal general parsed entity is well-formed
7202 * if its replacement text matches the production labeled
7203 * content.
7204 */
7205 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7206 ctxt->depth++;
7207 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7208 user_data, &list);
7209 ctxt->depth--;
7210
7211 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7212 ctxt->depth++;
7213 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7214 user_data, ctxt->depth, ent->URI,
7215 ent->ExternalID, &list);
7216 ctxt->depth--;
7217 } else {
7218 ret = XML_ERR_ENTITY_PE_INTERNAL;
7219 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7220 "invalid entity type found\n", NULL);
7221 }
7222
7223 /*
7224 * Store the number of entities needing parsing for this entity
7225 * content and do checkings
7226 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007227 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7228 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7229 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007230 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007231 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007232 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007233 return;
7234 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007235 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007236 xmlFreeNodeList(list);
7237 return;
7238 }
Owen Taylor3473f882001-02-23 17:55:21 +00007239
Daniel Veillard0161e632008-08-28 15:36:32 +00007240 if ((ret == XML_ERR_OK) && (list != NULL)) {
7241 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7242 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7243 (ent->children == NULL)) {
7244 ent->children = list;
7245 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007246 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007247 * Prune it directly in the generated document
7248 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007249 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007250 if (((list->type == XML_TEXT_NODE) &&
7251 (list->next == NULL)) ||
7252 (ctxt->parseMode == XML_PARSE_READER)) {
7253 list->parent = (xmlNodePtr) ent;
7254 list = NULL;
7255 ent->owner = 1;
7256 } else {
7257 ent->owner = 0;
7258 while (list != NULL) {
7259 list->parent = (xmlNodePtr) ctxt->node;
7260 list->doc = ctxt->myDoc;
7261 if (list->next == NULL)
7262 ent->last = list;
7263 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007264 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007265 list = ent->children;
7266#ifdef LIBXML_LEGACY_ENABLED
7267 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7268 xmlAddEntityReference(ent, list, NULL);
7269#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007270 }
7271 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007272 ent->owner = 1;
7273 while (list != NULL) {
7274 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007275 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007276 if (list->next == NULL)
7277 ent->last = list;
7278 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007279 }
7280 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007281 } else {
7282 xmlFreeNodeList(list);
7283 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007284 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007285 } else if ((ret != XML_ERR_OK) &&
7286 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7287 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7288 "Entity '%s' failed to parse\n", ent->name);
7289 } else if (list != NULL) {
7290 xmlFreeNodeList(list);
7291 list = NULL;
7292 }
7293 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007294 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007295 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007296 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007297 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007298
Daniel Veillard0161e632008-08-28 15:36:32 +00007299 /*
7300 * Now that the entity content has been gathered
7301 * provide it to the application, this can take different forms based
7302 * on the parsing modes.
7303 */
7304 if (ent->children == NULL) {
7305 /*
7306 * Probably running in SAX mode and the callbacks don't
7307 * build the entity content. So unless we already went
7308 * though parsing for first checking go though the entity
7309 * content to generate callbacks associated to the entity
7310 */
7311 if (was_checked != 0) {
7312 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007313 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007314 * This is a bit hackish but this seems the best
7315 * way to make sure both SAX and DOM entity support
7316 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007317 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007318 if (ctxt->userData == ctxt)
7319 user_data = NULL;
7320 else
7321 user_data = ctxt->userData;
7322
7323 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7324 ctxt->depth++;
7325 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7326 ent->content, user_data, NULL);
7327 ctxt->depth--;
7328 } else if (ent->etype ==
7329 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7330 ctxt->depth++;
7331 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7332 ctxt->sax, user_data, ctxt->depth,
7333 ent->URI, ent->ExternalID, NULL);
7334 ctxt->depth--;
7335 } else {
7336 ret = XML_ERR_ENTITY_PE_INTERNAL;
7337 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7338 "invalid entity type found\n", NULL);
7339 }
7340 if (ret == XML_ERR_ENTITY_LOOP) {
7341 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7342 return;
7343 }
7344 }
7345 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7346 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7347 /*
7348 * Entity reference callback comes second, it's somewhat
7349 * superfluous but a compatibility to historical behaviour
7350 */
7351 ctxt->sax->reference(ctxt->userData, ent->name);
7352 }
7353 return;
7354 }
7355
7356 /*
7357 * If we didn't get any children for the entity being built
7358 */
7359 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7360 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7361 /*
7362 * Create a node.
7363 */
7364 ctxt->sax->reference(ctxt->userData, ent->name);
7365 return;
7366 }
7367
7368 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7369 /*
7370 * There is a problem on the handling of _private for entities
7371 * (bug 155816): Should we copy the content of the field from
7372 * the entity (possibly overwriting some value set by the user
7373 * when a copy is created), should we leave it alone, or should
7374 * we try to take care of different situations? The problem
7375 * is exacerbated by the usage of this field by the xmlReader.
7376 * To fix this bug, we look at _private on the created node
7377 * and, if it's NULL, we copy in whatever was in the entity.
7378 * If it's not NULL we leave it alone. This is somewhat of a
7379 * hack - maybe we should have further tests to determine
7380 * what to do.
7381 */
7382 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7383 /*
7384 * Seems we are generating the DOM content, do
7385 * a simple tree copy for all references except the first
7386 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007387 */
7388 if (((list == NULL) && (ent->owner == 0)) ||
7389 (ctxt->parseMode == XML_PARSE_READER)) {
7390 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7391
7392 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007393 * We are copying here, make sure there is no abuse
7394 */
7395 ctxt->sizeentcopy += ent->length;
7396 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7397 return;
7398
7399 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007400 * when operating on a reader, the entities definitions
7401 * are always owning the entities subtree.
7402 if (ctxt->parseMode == XML_PARSE_READER)
7403 ent->owner = 1;
7404 */
7405
7406 cur = ent->children;
7407 while (cur != NULL) {
7408 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7409 if (nw != NULL) {
7410 if (nw->_private == NULL)
7411 nw->_private = cur->_private;
7412 if (firstChild == NULL){
7413 firstChild = nw;
7414 }
7415 nw = xmlAddChild(ctxt->node, nw);
7416 }
7417 if (cur == ent->last) {
7418 /*
7419 * needed to detect some strange empty
7420 * node cases in the reader tests
7421 */
7422 if ((ctxt->parseMode == XML_PARSE_READER) &&
7423 (nw != NULL) &&
7424 (nw->type == XML_ELEMENT_NODE) &&
7425 (nw->children == NULL))
7426 nw->extra = 1;
7427
7428 break;
7429 }
7430 cur = cur->next;
7431 }
7432#ifdef LIBXML_LEGACY_ENABLED
7433 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7434 xmlAddEntityReference(ent, firstChild, nw);
7435#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007436 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007437 xmlNodePtr nw = NULL, cur, next, last,
7438 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007439
7440 /*
7441 * We are copying here, make sure there is no abuse
7442 */
7443 ctxt->sizeentcopy += ent->length;
7444 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7445 return;
7446
Daniel Veillard0161e632008-08-28 15:36:32 +00007447 /*
7448 * Copy the entity child list and make it the new
7449 * entity child list. The goal is to make sure any
7450 * ID or REF referenced will be the one from the
7451 * document content and not the entity copy.
7452 */
7453 cur = ent->children;
7454 ent->children = NULL;
7455 last = ent->last;
7456 ent->last = NULL;
7457 while (cur != NULL) {
7458 next = cur->next;
7459 cur->next = NULL;
7460 cur->parent = NULL;
7461 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7462 if (nw != NULL) {
7463 if (nw->_private == NULL)
7464 nw->_private = cur->_private;
7465 if (firstChild == NULL){
7466 firstChild = cur;
7467 }
7468 xmlAddChild((xmlNodePtr) ent, nw);
7469 xmlAddChild(ctxt->node, cur);
7470 }
7471 if (cur == last)
7472 break;
7473 cur = next;
7474 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007475 if (ent->owner == 0)
7476 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007477#ifdef LIBXML_LEGACY_ENABLED
7478 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7479 xmlAddEntityReference(ent, firstChild, nw);
7480#endif /* LIBXML_LEGACY_ENABLED */
7481 } else {
7482 const xmlChar *nbktext;
7483
7484 /*
7485 * the name change is to avoid coalescing of the
7486 * node with a possible previous text one which
7487 * would make ent->children a dangling pointer
7488 */
7489 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7490 -1);
7491 if (ent->children->type == XML_TEXT_NODE)
7492 ent->children->name = nbktext;
7493 if ((ent->last != ent->children) &&
7494 (ent->last->type == XML_TEXT_NODE))
7495 ent->last->name = nbktext;
7496 xmlAddChildList(ctxt->node, ent->children);
7497 }
7498
7499 /*
7500 * This is to avoid a nasty side effect, see
7501 * characters() in SAX.c
7502 */
7503 ctxt->nodemem = 0;
7504 ctxt->nodelen = 0;
7505 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007506 }
7507 }
7508}
7509
7510/**
7511 * xmlParseEntityRef:
7512 * @ctxt: an XML parser context
7513 *
7514 * parse ENTITY references declarations
7515 *
7516 * [68] EntityRef ::= '&' Name ';'
7517 *
7518 * [ WFC: Entity Declared ]
7519 * In a document without any DTD, a document with only an internal DTD
7520 * subset which contains no parameter entity references, or a document
7521 * with "standalone='yes'", the Name given in the entity reference
7522 * must match that in an entity declaration, except that well-formed
7523 * documents need not declare any of the following entities: amp, lt,
7524 * gt, apos, quot. The declaration of a parameter entity must precede
7525 * any reference to it. Similarly, the declaration of a general entity
7526 * must precede any reference to it which appears in a default value in an
7527 * attribute-list declaration. Note that if entities are declared in the
7528 * external subset or in external parameter entities, a non-validating
7529 * processor is not obligated to read and process their declarations;
7530 * for such documents, the rule that an entity must be declared is a
7531 * well-formedness constraint only if standalone='yes'.
7532 *
7533 * [ WFC: Parsed Entity ]
7534 * An entity reference must not contain the name of an unparsed entity
7535 *
7536 * Returns the xmlEntityPtr if found, or NULL otherwise.
7537 */
7538xmlEntityPtr
7539xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007540 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007541 xmlEntityPtr ent = NULL;
7542
7543 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007544 if (ctxt->instate == XML_PARSER_EOF)
7545 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007546
Daniel Veillard0161e632008-08-28 15:36:32 +00007547 if (RAW != '&')
7548 return(NULL);
7549 NEXT;
7550 name = xmlParseName(ctxt);
7551 if (name == NULL) {
7552 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7553 "xmlParseEntityRef: no name\n");
7554 return(NULL);
7555 }
7556 if (RAW != ';') {
7557 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7558 return(NULL);
7559 }
7560 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007561
Daniel Veillard0161e632008-08-28 15:36:32 +00007562 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007563 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007564 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007565 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7566 ent = xmlGetPredefinedEntity(name);
7567 if (ent != NULL)
7568 return(ent);
7569 }
Owen Taylor3473f882001-02-23 17:55:21 +00007570
Daniel Veillard0161e632008-08-28 15:36:32 +00007571 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007572 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007573 */
7574 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007575
Daniel Veillard0161e632008-08-28 15:36:32 +00007576 /*
7577 * Ask first SAX for entity resolution, otherwise try the
7578 * entities which may have stored in the parser context.
7579 */
7580 if (ctxt->sax != NULL) {
7581 if (ctxt->sax->getEntity != NULL)
7582 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007583 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007584 (ctxt->options & XML_PARSE_OLDSAX))
7585 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007586 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7587 (ctxt->userData==ctxt)) {
7588 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007589 }
7590 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007591 if (ctxt->instate == XML_PARSER_EOF)
7592 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007593 /*
7594 * [ WFC: Entity Declared ]
7595 * In a document without any DTD, a document with only an
7596 * internal DTD subset which contains no parameter entity
7597 * references, or a document with "standalone='yes'", the
7598 * Name given in the entity reference must match that in an
7599 * entity declaration, except that well-formed documents
7600 * need not declare any of the following entities: amp, lt,
7601 * gt, apos, quot.
7602 * The declaration of a parameter entity must precede any
7603 * reference to it.
7604 * Similarly, the declaration of a general entity must
7605 * precede any reference to it which appears in a default
7606 * value in an attribute-list declaration. Note that if
7607 * entities are declared in the external subset or in
7608 * external parameter entities, a non-validating processor
7609 * is not obligated to read and process their declarations;
7610 * for such documents, the rule that an entity must be
7611 * declared is a well-formedness constraint only if
7612 * standalone='yes'.
7613 */
7614 if (ent == NULL) {
7615 if ((ctxt->standalone == 1) ||
7616 ((ctxt->hasExternalSubset == 0) &&
7617 (ctxt->hasPErefs == 0))) {
7618 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7619 "Entity '%s' not defined\n", name);
7620 } else {
7621 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7622 "Entity '%s' not defined\n", name);
7623 if ((ctxt->inSubset == 0) &&
7624 (ctxt->sax != NULL) &&
7625 (ctxt->sax->reference != NULL)) {
7626 ctxt->sax->reference(ctxt->userData, name);
7627 }
7628 }
7629 ctxt->valid = 0;
7630 }
7631
7632 /*
7633 * [ WFC: Parsed Entity ]
7634 * An entity reference must not contain the name of an
7635 * unparsed entity
7636 */
7637 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7638 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7639 "Entity reference to unparsed entity %s\n", name);
7640 }
7641
7642 /*
7643 * [ WFC: No External Entity References ]
7644 * Attribute values cannot contain direct or indirect
7645 * entity references to external entities.
7646 */
7647 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7648 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7649 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7650 "Attribute references external entity '%s'\n", name);
7651 }
7652 /*
7653 * [ WFC: No < in Attribute Values ]
7654 * The replacement text of any entity referred to directly or
7655 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007656 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007657 */
7658 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007659 (ent != NULL) &&
7660 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7661 if ((ent->checked & 1) || ((ent->checked == 0) &&
7662 (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7663 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7664 "'<' in entity '%s' is not allowed in attributes values\n", name);
7665 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007666 }
7667
7668 /*
7669 * Internal check, no parameter entities here ...
7670 */
7671 else {
7672 switch (ent->etype) {
7673 case XML_INTERNAL_PARAMETER_ENTITY:
7674 case XML_EXTERNAL_PARAMETER_ENTITY:
7675 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7676 "Attempt to reference the parameter entity '%s'\n",
7677 name);
7678 break;
7679 default:
7680 break;
7681 }
7682 }
7683
7684 /*
7685 * [ WFC: No Recursion ]
7686 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007687 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007688 * Done somewhere else
7689 */
Owen Taylor3473f882001-02-23 17:55:21 +00007690 return(ent);
7691}
7692
7693/**
7694 * xmlParseStringEntityRef:
7695 * @ctxt: an XML parser context
7696 * @str: a pointer to an index in the string
7697 *
7698 * parse ENTITY references declarations, but this version parses it from
7699 * a string value.
7700 *
7701 * [68] EntityRef ::= '&' Name ';'
7702 *
7703 * [ WFC: Entity Declared ]
7704 * In a document without any DTD, a document with only an internal DTD
7705 * subset which contains no parameter entity references, or a document
7706 * with "standalone='yes'", the Name given in the entity reference
7707 * must match that in an entity declaration, except that well-formed
7708 * documents need not declare any of the following entities: amp, lt,
7709 * gt, apos, quot. The declaration of a parameter entity must precede
7710 * any reference to it. Similarly, the declaration of a general entity
7711 * must precede any reference to it which appears in a default value in an
7712 * attribute-list declaration. Note that if entities are declared in the
7713 * external subset or in external parameter entities, a non-validating
7714 * processor is not obligated to read and process their declarations;
7715 * for such documents, the rule that an entity must be declared is a
7716 * well-formedness constraint only if standalone='yes'.
7717 *
7718 * [ WFC: Parsed Entity ]
7719 * An entity reference must not contain the name of an unparsed entity
7720 *
7721 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7722 * is updated to the current location in the string.
7723 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007724static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007725xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7726 xmlChar *name;
7727 const xmlChar *ptr;
7728 xmlChar cur;
7729 xmlEntityPtr ent = NULL;
7730
7731 if ((str == NULL) || (*str == NULL))
7732 return(NULL);
7733 ptr = *str;
7734 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007735 if (cur != '&')
7736 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007737
Daniel Veillard0161e632008-08-28 15:36:32 +00007738 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007739 name = xmlParseStringName(ctxt, &ptr);
7740 if (name == NULL) {
7741 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7742 "xmlParseStringEntityRef: no name\n");
7743 *str = ptr;
7744 return(NULL);
7745 }
7746 if (*ptr != ';') {
7747 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007748 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007749 *str = ptr;
7750 return(NULL);
7751 }
7752 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007753
Owen Taylor3473f882001-02-23 17:55:21 +00007754
Daniel Veillard0161e632008-08-28 15:36:32 +00007755 /*
7756 * Predefined entites override any extra definition
7757 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007758 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7759 ent = xmlGetPredefinedEntity(name);
7760 if (ent != NULL) {
7761 xmlFree(name);
7762 *str = ptr;
7763 return(ent);
7764 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007765 }
Owen Taylor3473f882001-02-23 17:55:21 +00007766
Daniel Veillard0161e632008-08-28 15:36:32 +00007767 /*
7768 * Increate the number of entity references parsed
7769 */
7770 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007771
Daniel Veillard0161e632008-08-28 15:36:32 +00007772 /*
7773 * Ask first SAX for entity resolution, otherwise try the
7774 * entities which may have stored in the parser context.
7775 */
7776 if (ctxt->sax != NULL) {
7777 if (ctxt->sax->getEntity != NULL)
7778 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007779 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7780 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007781 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7782 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007783 }
7784 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007785 if (ctxt->instate == XML_PARSER_EOF) {
7786 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007787 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007788 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007789
7790 /*
7791 * [ WFC: Entity Declared ]
7792 * In a document without any DTD, a document with only an
7793 * internal DTD subset which contains no parameter entity
7794 * references, or a document with "standalone='yes'", the
7795 * Name given in the entity reference must match that in an
7796 * entity declaration, except that well-formed documents
7797 * need not declare any of the following entities: amp, lt,
7798 * gt, apos, quot.
7799 * The declaration of a parameter entity must precede any
7800 * reference to it.
7801 * Similarly, the declaration of a general entity must
7802 * precede any reference to it which appears in a default
7803 * value in an attribute-list declaration. Note that if
7804 * entities are declared in the external subset or in
7805 * external parameter entities, a non-validating processor
7806 * is not obligated to read and process their declarations;
7807 * for such documents, the rule that an entity must be
7808 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007809 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007810 */
7811 if (ent == NULL) {
7812 if ((ctxt->standalone == 1) ||
7813 ((ctxt->hasExternalSubset == 0) &&
7814 (ctxt->hasPErefs == 0))) {
7815 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7816 "Entity '%s' not defined\n", name);
7817 } else {
7818 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7819 "Entity '%s' not defined\n",
7820 name);
7821 }
7822 /* TODO ? check regressions ctxt->valid = 0; */
7823 }
7824
7825 /*
7826 * [ WFC: Parsed Entity ]
7827 * An entity reference must not contain the name of an
7828 * unparsed entity
7829 */
7830 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7831 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7832 "Entity reference to unparsed entity %s\n", name);
7833 }
7834
7835 /*
7836 * [ WFC: No External Entity References ]
7837 * Attribute values cannot contain direct or indirect
7838 * entity references to external entities.
7839 */
7840 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7841 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7842 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7843 "Attribute references external entity '%s'\n", name);
7844 }
7845 /*
7846 * [ WFC: No < in Attribute Values ]
7847 * The replacement text of any entity referred to directly or
7848 * indirectly in an attribute value (other than "&lt;") must
7849 * not contain a <.
7850 */
7851 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7852 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007853 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007854 (xmlStrchr(ent->content, '<'))) {
7855 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7856 "'<' in entity '%s' is not allowed in attributes values\n",
7857 name);
7858 }
7859
7860 /*
7861 * Internal check, no parameter entities here ...
7862 */
7863 else {
7864 switch (ent->etype) {
7865 case XML_INTERNAL_PARAMETER_ENTITY:
7866 case XML_EXTERNAL_PARAMETER_ENTITY:
7867 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7868 "Attempt to reference the parameter entity '%s'\n",
7869 name);
7870 break;
7871 default:
7872 break;
7873 }
7874 }
7875
7876 /*
7877 * [ WFC: No Recursion ]
7878 * A parsed entity must not contain a recursive reference
7879 * to itself, either directly or indirectly.
7880 * Done somewhere else
7881 */
7882
7883 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007884 *str = ptr;
7885 return(ent);
7886}
7887
7888/**
7889 * xmlParsePEReference:
7890 * @ctxt: an XML parser context
7891 *
7892 * parse PEReference declarations
7893 * The entity content is handled directly by pushing it's content as
7894 * a new input stream.
7895 *
7896 * [69] PEReference ::= '%' Name ';'
7897 *
7898 * [ WFC: No Recursion ]
7899 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007900 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007901 *
7902 * [ WFC: Entity Declared ]
7903 * In a document without any DTD, a document with only an internal DTD
7904 * subset which contains no parameter entity references, or a document
7905 * with "standalone='yes'", ... ... The declaration of a parameter
7906 * entity must precede any reference to it...
7907 *
7908 * [ VC: Entity Declared ]
7909 * In a document with an external subset or external parameter entities
7910 * with "standalone='no'", ... ... The declaration of a parameter entity
7911 * must precede any reference to it...
7912 *
7913 * [ WFC: In DTD ]
7914 * Parameter-entity references may only appear in the DTD.
7915 * NOTE: misleading but this is handled.
7916 */
7917void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007918xmlParsePEReference(xmlParserCtxtPtr ctxt)
7919{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007920 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007921 xmlEntityPtr entity = NULL;
7922 xmlParserInputPtr input;
7923
Daniel Veillard0161e632008-08-28 15:36:32 +00007924 if (RAW != '%')
7925 return;
7926 NEXT;
7927 name = xmlParseName(ctxt);
7928 if (name == NULL) {
7929 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7930 "xmlParsePEReference: no name\n");
7931 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007932 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007933 if (RAW != ';') {
7934 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7935 return;
7936 }
7937
7938 NEXT;
7939
7940 /*
7941 * Increate the number of entity references parsed
7942 */
7943 ctxt->nbentities++;
7944
7945 /*
7946 * Request the entity from SAX
7947 */
7948 if ((ctxt->sax != NULL) &&
7949 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007950 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7951 if (ctxt->instate == XML_PARSER_EOF)
7952 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007953 if (entity == NULL) {
7954 /*
7955 * [ WFC: Entity Declared ]
7956 * In a document without any DTD, a document with only an
7957 * internal DTD subset which contains no parameter entity
7958 * references, or a document with "standalone='yes'", ...
7959 * ... The declaration of a parameter entity must precede
7960 * any reference to it...
7961 */
7962 if ((ctxt->standalone == 1) ||
7963 ((ctxt->hasExternalSubset == 0) &&
7964 (ctxt->hasPErefs == 0))) {
7965 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7966 "PEReference: %%%s; not found\n",
7967 name);
7968 } else {
7969 /*
7970 * [ VC: Entity Declared ]
7971 * In a document with an external subset or external
7972 * parameter entities with "standalone='no'", ...
7973 * ... The declaration of a parameter entity must
7974 * precede any reference to it...
7975 */
7976 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7977 "PEReference: %%%s; not found\n",
7978 name, NULL);
7979 ctxt->valid = 0;
7980 }
7981 } else {
7982 /*
7983 * Internal checking in case the entity quest barfed
7984 */
7985 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7986 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7987 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7988 "Internal: %%%s; is not a parameter entity\n",
7989 name, NULL);
7990 } else if (ctxt->input->free != deallocblankswrapper) {
7991 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7992 if (xmlPushInput(ctxt, input) < 0)
7993 return;
7994 } else {
7995 /*
7996 * TODO !!!
7997 * handle the extra spaces added before and after
7998 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7999 */
8000 input = xmlNewEntityInputStream(ctxt, entity);
8001 if (xmlPushInput(ctxt, input) < 0)
8002 return;
8003 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8004 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8005 (IS_BLANK_CH(NXT(5)))) {
8006 xmlParseTextDecl(ctxt);
8007 if (ctxt->errNo ==
8008 XML_ERR_UNSUPPORTED_ENCODING) {
8009 /*
8010 * The XML REC instructs us to stop parsing
8011 * right here
8012 */
8013 ctxt->instate = XML_PARSER_EOF;
8014 return;
8015 }
8016 }
8017 }
8018 }
8019 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008020}
8021
8022/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008023 * xmlLoadEntityContent:
8024 * @ctxt: an XML parser context
8025 * @entity: an unloaded system entity
8026 *
8027 * Load the original content of the given system entity from the
8028 * ExternalID/SystemID given. This is to be used for Included in Literal
8029 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8030 *
8031 * Returns 0 in case of success and -1 in case of failure
8032 */
8033static int
8034xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8035 xmlParserInputPtr input;
8036 xmlBufferPtr buf;
8037 int l, c;
8038 int count = 0;
8039
8040 if ((ctxt == NULL) || (entity == NULL) ||
8041 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8042 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8043 (entity->content != NULL)) {
8044 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8045 "xmlLoadEntityContent parameter error");
8046 return(-1);
8047 }
8048
8049 if (xmlParserDebugEntities)
8050 xmlGenericError(xmlGenericErrorContext,
8051 "Reading %s entity content input\n", entity->name);
8052
8053 buf = xmlBufferCreate();
8054 if (buf == NULL) {
8055 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8056 "xmlLoadEntityContent parameter error");
8057 return(-1);
8058 }
8059
8060 input = xmlNewEntityInputStream(ctxt, entity);
8061 if (input == NULL) {
8062 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8063 "xmlLoadEntityContent input error");
8064 xmlBufferFree(buf);
8065 return(-1);
8066 }
8067
8068 /*
8069 * Push the entity as the current input, read char by char
8070 * saving to the buffer until the end of the entity or an error
8071 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008072 if (xmlPushInput(ctxt, input) < 0) {
8073 xmlBufferFree(buf);
8074 return(-1);
8075 }
8076
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008077 GROW;
8078 c = CUR_CHAR(l);
8079 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8080 (IS_CHAR(c))) {
8081 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008082 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008083 count = 0;
8084 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008085 if (ctxt->instate == XML_PARSER_EOF) {
8086 xmlBufferFree(buf);
8087 return(-1);
8088 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008089 }
8090 NEXTL(l);
8091 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008092 if (c == 0) {
8093 count = 0;
8094 GROW;
8095 if (ctxt->instate == XML_PARSER_EOF) {
8096 xmlBufferFree(buf);
8097 return(-1);
8098 }
8099 c = CUR_CHAR(l);
8100 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008101 }
8102
8103 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8104 xmlPopInput(ctxt);
8105 } else if (!IS_CHAR(c)) {
8106 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8107 "xmlLoadEntityContent: invalid char value %d\n",
8108 c);
8109 xmlBufferFree(buf);
8110 return(-1);
8111 }
8112 entity->content = buf->content;
8113 buf->content = NULL;
8114 xmlBufferFree(buf);
8115
8116 return(0);
8117}
8118
8119/**
Owen Taylor3473f882001-02-23 17:55:21 +00008120 * xmlParseStringPEReference:
8121 * @ctxt: an XML parser context
8122 * @str: a pointer to an index in the string
8123 *
8124 * parse PEReference declarations
8125 *
8126 * [69] PEReference ::= '%' Name ';'
8127 *
8128 * [ WFC: No Recursion ]
8129 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008130 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008131 *
8132 * [ WFC: Entity Declared ]
8133 * In a document without any DTD, a document with only an internal DTD
8134 * subset which contains no parameter entity references, or a document
8135 * with "standalone='yes'", ... ... The declaration of a parameter
8136 * entity must precede any reference to it...
8137 *
8138 * [ VC: Entity Declared ]
8139 * In a document with an external subset or external parameter entities
8140 * with "standalone='no'", ... ... The declaration of a parameter entity
8141 * must precede any reference to it...
8142 *
8143 * [ WFC: In DTD ]
8144 * Parameter-entity references may only appear in the DTD.
8145 * NOTE: misleading but this is handled.
8146 *
8147 * Returns the string of the entity content.
8148 * str is updated to the current value of the index
8149 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008150static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008151xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8152 const xmlChar *ptr;
8153 xmlChar cur;
8154 xmlChar *name;
8155 xmlEntityPtr entity = NULL;
8156
8157 if ((str == NULL) || (*str == NULL)) return(NULL);
8158 ptr = *str;
8159 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008160 if (cur != '%')
8161 return(NULL);
8162 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008163 name = xmlParseStringName(ctxt, &ptr);
8164 if (name == NULL) {
8165 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8166 "xmlParseStringPEReference: no name\n");
8167 *str = ptr;
8168 return(NULL);
8169 }
8170 cur = *ptr;
8171 if (cur != ';') {
8172 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8173 xmlFree(name);
8174 *str = ptr;
8175 return(NULL);
8176 }
8177 ptr++;
8178
8179 /*
8180 * Increate the number of entity references parsed
8181 */
8182 ctxt->nbentities++;
8183
8184 /*
8185 * Request the entity from SAX
8186 */
8187 if ((ctxt->sax != NULL) &&
8188 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008189 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8190 if (ctxt->instate == XML_PARSER_EOF) {
8191 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008192 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008193 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008194 if (entity == NULL) {
8195 /*
8196 * [ WFC: Entity Declared ]
8197 * In a document without any DTD, a document with only an
8198 * internal DTD subset which contains no parameter entity
8199 * references, or a document with "standalone='yes'", ...
8200 * ... The declaration of a parameter entity must precede
8201 * any reference to it...
8202 */
8203 if ((ctxt->standalone == 1) ||
8204 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8205 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8206 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008207 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008208 /*
8209 * [ VC: Entity Declared ]
8210 * In a document with an external subset or external
8211 * parameter entities with "standalone='no'", ...
8212 * ... The declaration of a parameter entity must
8213 * precede any reference to it...
8214 */
8215 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8216 "PEReference: %%%s; not found\n",
8217 name, NULL);
8218 ctxt->valid = 0;
8219 }
8220 } else {
8221 /*
8222 * Internal checking in case the entity quest barfed
8223 */
8224 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8225 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8226 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8227 "%%%s; is not a parameter entity\n",
8228 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008229 }
8230 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008231 ctxt->hasPErefs = 1;
8232 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008233 *str = ptr;
8234 return(entity);
8235}
8236
8237/**
8238 * xmlParseDocTypeDecl:
8239 * @ctxt: an XML parser context
8240 *
8241 * parse a DOCTYPE declaration
8242 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008243 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008244 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8245 *
8246 * [ VC: Root Element Type ]
8247 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008248 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008249 */
8250
8251void
8252xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008253 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008254 xmlChar *ExternalID = NULL;
8255 xmlChar *URI = NULL;
8256
8257 /*
8258 * We know that '<!DOCTYPE' has been detected.
8259 */
8260 SKIP(9);
8261
8262 SKIP_BLANKS;
8263
8264 /*
8265 * Parse the DOCTYPE name.
8266 */
8267 name = xmlParseName(ctxt);
8268 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008269 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8270 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008271 }
8272 ctxt->intSubName = name;
8273
8274 SKIP_BLANKS;
8275
8276 /*
8277 * Check for SystemID and ExternalID
8278 */
8279 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8280
8281 if ((URI != NULL) || (ExternalID != NULL)) {
8282 ctxt->hasExternalSubset = 1;
8283 }
8284 ctxt->extSubURI = URI;
8285 ctxt->extSubSystem = ExternalID;
8286
8287 SKIP_BLANKS;
8288
8289 /*
8290 * Create and update the internal subset.
8291 */
8292 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8293 (!ctxt->disableSAX))
8294 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008295 if (ctxt->instate == XML_PARSER_EOF)
8296 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008297
8298 /*
8299 * Is there any internal subset declarations ?
8300 * they are handled separately in xmlParseInternalSubset()
8301 */
8302 if (RAW == '[')
8303 return;
8304
8305 /*
8306 * We should be at the end of the DOCTYPE declaration.
8307 */
8308 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008309 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008310 }
8311 NEXT;
8312}
8313
8314/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008315 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008316 * @ctxt: an XML parser context
8317 *
8318 * parse the internal subset declaration
8319 *
8320 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8321 */
8322
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008323static void
Owen Taylor3473f882001-02-23 17:55:21 +00008324xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8325 /*
8326 * Is there any DTD definition ?
8327 */
8328 if (RAW == '[') {
8329 ctxt->instate = XML_PARSER_DTD;
8330 NEXT;
8331 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008332 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008333 * PEReferences.
8334 * Subsequence (markupdecl | PEReference | S)*
8335 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008336 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008337 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008338 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008339
8340 SKIP_BLANKS;
8341 xmlParseMarkupDecl(ctxt);
8342 xmlParsePEReference(ctxt);
8343
8344 /*
8345 * Pop-up of finished entities.
8346 */
8347 while ((RAW == 0) && (ctxt->inputNr > 1))
8348 xmlPopInput(ctxt);
8349
8350 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008351 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008352 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008353 break;
8354 }
8355 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008356 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008357 NEXT;
8358 SKIP_BLANKS;
8359 }
8360 }
8361
8362 /*
8363 * We should be at the end of the DOCTYPE declaration.
8364 */
8365 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008366 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008367 }
8368 NEXT;
8369}
8370
Daniel Veillard81273902003-09-30 00:43:48 +00008371#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008372/**
8373 * xmlParseAttribute:
8374 * @ctxt: an XML parser context
8375 * @value: a xmlChar ** used to store the value of the attribute
8376 *
8377 * parse an attribute
8378 *
8379 * [41] Attribute ::= Name Eq AttValue
8380 *
8381 * [ WFC: No External Entity References ]
8382 * Attribute values cannot contain direct or indirect entity references
8383 * to external entities.
8384 *
8385 * [ WFC: No < in Attribute Values ]
8386 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008387 * an attribute value (other than "&lt;") must not contain a <.
8388 *
Owen Taylor3473f882001-02-23 17:55:21 +00008389 * [ VC: Attribute Value Type ]
8390 * The attribute must have been declared; the value must be of the type
8391 * declared for it.
8392 *
8393 * [25] Eq ::= S? '=' S?
8394 *
8395 * With namespace:
8396 *
8397 * [NS 11] Attribute ::= QName Eq AttValue
8398 *
8399 * Also the case QName == xmlns:??? is handled independently as a namespace
8400 * definition.
8401 *
8402 * Returns the attribute name, and the value in *value.
8403 */
8404
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008405const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008406xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008407 const xmlChar *name;
8408 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008409
8410 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008411 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008412 name = xmlParseName(ctxt);
8413 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008414 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008415 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008416 return(NULL);
8417 }
8418
8419 /*
8420 * read the value
8421 */
8422 SKIP_BLANKS;
8423 if (RAW == '=') {
8424 NEXT;
8425 SKIP_BLANKS;
8426 val = xmlParseAttValue(ctxt);
8427 ctxt->instate = XML_PARSER_CONTENT;
8428 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008429 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008430 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008431 return(NULL);
8432 }
8433
8434 /*
8435 * Check that xml:lang conforms to the specification
8436 * No more registered as an error, just generate a warning now
8437 * since this was deprecated in XML second edition
8438 */
8439 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8440 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008441 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8442 "Malformed value for xml:lang : %s\n",
8443 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008444 }
8445 }
8446
8447 /*
8448 * Check that xml:space conforms to the specification
8449 */
8450 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8451 if (xmlStrEqual(val, BAD_CAST "default"))
8452 *(ctxt->space) = 0;
8453 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8454 *(ctxt->space) = 1;
8455 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008456 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008457"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008458 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008459 }
8460 }
8461
8462 *value = val;
8463 return(name);
8464}
8465
8466/**
8467 * xmlParseStartTag:
8468 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008469 *
Owen Taylor3473f882001-02-23 17:55:21 +00008470 * parse a start of tag either for rule element or
8471 * EmptyElement. In both case we don't parse the tag closing chars.
8472 *
8473 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8474 *
8475 * [ WFC: Unique Att Spec ]
8476 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008477 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008478 *
8479 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8480 *
8481 * [ WFC: Unique Att Spec ]
8482 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008483 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008484 *
8485 * With namespace:
8486 *
8487 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8488 *
8489 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8490 *
8491 * Returns the element name parsed
8492 */
8493
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008494const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008495xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008496 const xmlChar *name;
8497 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008498 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008499 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008500 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008501 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008502 int i;
8503
8504 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008505 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008506
8507 name = xmlParseName(ctxt);
8508 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008509 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008510 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008511 return(NULL);
8512 }
8513
8514 /*
8515 * Now parse the attributes, it ends up with the ending
8516 *
8517 * (S Attribute)* S?
8518 */
8519 SKIP_BLANKS;
8520 GROW;
8521
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008522 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008523 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008524 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008525 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008526 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008527
8528 attname = xmlParseAttribute(ctxt, &attvalue);
8529 if ((attname != NULL) && (attvalue != NULL)) {
8530 /*
8531 * [ WFC: Unique Att Spec ]
8532 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008533 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008534 */
8535 for (i = 0; i < nbatts;i += 2) {
8536 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008537 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008538 xmlFree(attvalue);
8539 goto failed;
8540 }
8541 }
Owen Taylor3473f882001-02-23 17:55:21 +00008542 /*
8543 * Add the pair to atts
8544 */
8545 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008546 maxatts = 22; /* allow for 10 attrs by default */
8547 atts = (const xmlChar **)
8548 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008549 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008550 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008551 if (attvalue != NULL)
8552 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008553 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008554 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008555 ctxt->atts = atts;
8556 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008557 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008558 const xmlChar **n;
8559
Owen Taylor3473f882001-02-23 17:55:21 +00008560 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008561 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008562 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008563 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008564 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008565 if (attvalue != NULL)
8566 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008567 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008568 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008569 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008570 ctxt->atts = atts;
8571 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008572 }
8573 atts[nbatts++] = attname;
8574 atts[nbatts++] = attvalue;
8575 atts[nbatts] = NULL;
8576 atts[nbatts + 1] = NULL;
8577 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008578 if (attvalue != NULL)
8579 xmlFree(attvalue);
8580 }
8581
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008582failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008583
Daniel Veillard3772de32002-12-17 10:31:45 +00008584 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008585 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8586 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008587 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008588 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8589 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008590 }
8591 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008592 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8593 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008594 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8595 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008596 break;
8597 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008598 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008599 GROW;
8600 }
8601
8602 /*
8603 * SAX: Start of Element !
8604 */
8605 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008606 (!ctxt->disableSAX)) {
8607 if (nbatts > 0)
8608 ctxt->sax->startElement(ctxt->userData, name, atts);
8609 else
8610 ctxt->sax->startElement(ctxt->userData, name, NULL);
8611 }
Owen Taylor3473f882001-02-23 17:55:21 +00008612
8613 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008614 /* Free only the content strings */
8615 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008616 if (atts[i] != NULL)
8617 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008618 }
8619 return(name);
8620}
8621
8622/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008623 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008624 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008625 * @line: line of the start tag
8626 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008627 *
8628 * parse an end of tag
8629 *
8630 * [42] ETag ::= '</' Name S? '>'
8631 *
8632 * With namespace
8633 *
8634 * [NS 9] ETag ::= '</' QName S? '>'
8635 */
8636
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008637static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008638xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008639 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008640
8641 GROW;
8642 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008643 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008644 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008645 return;
8646 }
8647 SKIP(2);
8648
Daniel Veillard46de64e2002-05-29 08:21:33 +00008649 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008650
8651 /*
8652 * We should definitely be at the ending "S? '>'" part
8653 */
8654 GROW;
8655 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008656 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008657 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008658 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008659 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008660
8661 /*
8662 * [ WFC: Element Type Match ]
8663 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008664 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008665 *
8666 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008667 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008668 if (name == NULL) name = BAD_CAST "unparseable";
8669 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008670 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008671 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008672 }
8673
8674 /*
8675 * SAX: End of Tag
8676 */
8677 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8678 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008679 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008680
Daniel Veillarde57ec792003-09-10 10:50:59 +00008681 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008682 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008683 return;
8684}
8685
8686/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008687 * xmlParseEndTag:
8688 * @ctxt: an XML parser context
8689 *
8690 * parse an end of tag
8691 *
8692 * [42] ETag ::= '</' Name S? '>'
8693 *
8694 * With namespace
8695 *
8696 * [NS 9] ETag ::= '</' QName S? '>'
8697 */
8698
8699void
8700xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008701 xmlParseEndTag1(ctxt, 0);
8702}
Daniel Veillard81273902003-09-30 00:43:48 +00008703#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008704
8705/************************************************************************
8706 * *
8707 * SAX 2 specific operations *
8708 * *
8709 ************************************************************************/
8710
Daniel Veillard0fb18932003-09-07 09:14:37 +00008711/*
8712 * xmlGetNamespace:
8713 * @ctxt: an XML parser context
8714 * @prefix: the prefix to lookup
8715 *
8716 * Lookup the namespace name for the @prefix (which ca be NULL)
8717 * The prefix must come from the @ctxt->dict dictionnary
8718 *
8719 * Returns the namespace name or NULL if not bound
8720 */
8721static const xmlChar *
8722xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8723 int i;
8724
Daniel Veillarde57ec792003-09-10 10:50:59 +00008725 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008726 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008727 if (ctxt->nsTab[i] == prefix) {
8728 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8729 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008731 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008732 return(NULL);
8733}
8734
8735/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008736 * xmlParseQName:
8737 * @ctxt: an XML parser context
8738 * @prefix: pointer to store the prefix part
8739 *
8740 * parse an XML Namespace QName
8741 *
8742 * [6] QName ::= (Prefix ':')? LocalPart
8743 * [7] Prefix ::= NCName
8744 * [8] LocalPart ::= NCName
8745 *
8746 * Returns the Name parsed or NULL
8747 */
8748
8749static const xmlChar *
8750xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8751 const xmlChar *l, *p;
8752
8753 GROW;
8754
8755 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008756 if (l == NULL) {
8757 if (CUR == ':') {
8758 l = xmlParseName(ctxt);
8759 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008760 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008761 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008762 *prefix = NULL;
8763 return(l);
8764 }
8765 }
8766 return(NULL);
8767 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008768 if (CUR == ':') {
8769 NEXT;
8770 p = l;
8771 l = xmlParseNCName(ctxt);
8772 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008773 xmlChar *tmp;
8774
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008775 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8776 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008777 l = xmlParseNmtoken(ctxt);
8778 if (l == NULL)
8779 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8780 else {
8781 tmp = xmlBuildQName(l, p, NULL, 0);
8782 xmlFree((char *)l);
8783 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008784 p = xmlDictLookup(ctxt->dict, tmp, -1);
8785 if (tmp != NULL) xmlFree(tmp);
8786 *prefix = NULL;
8787 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008788 }
8789 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008790 xmlChar *tmp;
8791
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008792 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8793 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008794 NEXT;
8795 tmp = (xmlChar *) xmlParseName(ctxt);
8796 if (tmp != NULL) {
8797 tmp = xmlBuildQName(tmp, l, NULL, 0);
8798 l = xmlDictLookup(ctxt->dict, tmp, -1);
8799 if (tmp != NULL) xmlFree(tmp);
8800 *prefix = p;
8801 return(l);
8802 }
8803 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8804 l = xmlDictLookup(ctxt->dict, tmp, -1);
8805 if (tmp != NULL) xmlFree(tmp);
8806 *prefix = p;
8807 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008808 }
8809 *prefix = p;
8810 } else
8811 *prefix = NULL;
8812 return(l);
8813}
8814
8815/**
8816 * xmlParseQNameAndCompare:
8817 * @ctxt: an XML parser context
8818 * @name: the localname
8819 * @prefix: the prefix, if any.
8820 *
8821 * parse an XML name and compares for match
8822 * (specialized for endtag parsing)
8823 *
8824 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8825 * and the name for mismatch
8826 */
8827
8828static const xmlChar *
8829xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8830 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008831 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832 const xmlChar *in;
8833 const xmlChar *ret;
8834 const xmlChar *prefix2;
8835
8836 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8837
8838 GROW;
8839 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008840
Daniel Veillard0fb18932003-09-07 09:14:37 +00008841 cmp = prefix;
8842 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008843 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008844 ++cmp;
8845 }
8846 if ((*cmp == 0) && (*in == ':')) {
8847 in++;
8848 cmp = name;
8849 while (*in != 0 && *in == *cmp) {
8850 ++in;
8851 ++cmp;
8852 }
William M. Brack76e95df2003-10-18 16:20:14 +00008853 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008854 /* success */
8855 ctxt->input->cur = in;
8856 return((const xmlChar*) 1);
8857 }
8858 }
8859 /*
8860 * all strings coms from the dictionary, equality can be done directly
8861 */
8862 ret = xmlParseQName (ctxt, &prefix2);
8863 if ((ret == name) && (prefix == prefix2))
8864 return((const xmlChar*) 1);
8865 return ret;
8866}
8867
8868/**
8869 * xmlParseAttValueInternal:
8870 * @ctxt: an XML parser context
8871 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008872 * @alloc: whether the attribute was reallocated as a new string
8873 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008874 *
8875 * parse a value for an attribute.
8876 * NOTE: if no normalization is needed, the routine will return pointers
8877 * directly from the data buffer.
8878 *
8879 * 3.3.3 Attribute-Value Normalization:
8880 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008881 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008882 * - a character reference is processed by appending the referenced
8883 * character to the attribute value
8884 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008885 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008886 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8887 * appending #x20 to the normalized value, except that only a single
8888 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008889 * parsed entity or the literal entity value of an internal parsed entity
8890 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008891 * If the declared value is not CDATA, then the XML processor must further
8892 * process the normalized attribute value by discarding any leading and
8893 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008894 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008895 * All attributes for which no declaration has been read should be treated
8896 * by a non-validating parser as if declared CDATA.
8897 *
8898 * Returns the AttValue parsed or NULL. The value has to be freed by the
8899 * caller if it was copied, this can be detected by val[*len] == 0.
8900 */
8901
8902static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008903xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8904 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008905{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008906 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008907 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008908 xmlChar *ret = NULL;
8909
8910 GROW;
8911 in = (xmlChar *) CUR_PTR;
8912 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008913 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008914 return (NULL);
8915 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008916 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008917
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008918 /*
8919 * try to handle in this routine the most common case where no
8920 * allocation of a new string is required and where content is
8921 * pure ASCII.
8922 */
8923 limit = *in++;
8924 end = ctxt->input->end;
8925 start = in;
8926 if (in >= end) {
8927 const xmlChar *oldbase = ctxt->input->base;
8928 GROW;
8929 if (oldbase != ctxt->input->base) {
8930 long delta = ctxt->input->base - oldbase;
8931 start = start + delta;
8932 in = in + delta;
8933 }
8934 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008935 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008936 if (normalize) {
8937 /*
8938 * Skip any leading spaces
8939 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008940 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008941 ((*in == 0x20) || (*in == 0x9) ||
8942 (*in == 0xA) || (*in == 0xD))) {
8943 in++;
8944 start = in;
8945 if (in >= end) {
8946 const xmlChar *oldbase = ctxt->input->base;
8947 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008948 if (ctxt->instate == XML_PARSER_EOF)
8949 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008950 if (oldbase != ctxt->input->base) {
8951 long delta = ctxt->input->base - oldbase;
8952 start = start + delta;
8953 in = in + delta;
8954 }
8955 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008956 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8957 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8958 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008959 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008960 return(NULL);
8961 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008962 }
8963 }
8964 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8965 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8966 if ((*in++ == 0x20) && (*in == 0x20)) break;
8967 if (in >= end) {
8968 const xmlChar *oldbase = ctxt->input->base;
8969 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008970 if (ctxt->instate == XML_PARSER_EOF)
8971 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008972 if (oldbase != ctxt->input->base) {
8973 long delta = ctxt->input->base - oldbase;
8974 start = start + delta;
8975 in = in + delta;
8976 }
8977 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008978 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8979 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8980 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008981 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008982 return(NULL);
8983 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008984 }
8985 }
8986 last = in;
8987 /*
8988 * skip the trailing blanks
8989 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008990 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008991 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008992 ((*in == 0x20) || (*in == 0x9) ||
8993 (*in == 0xA) || (*in == 0xD))) {
8994 in++;
8995 if (in >= end) {
8996 const xmlChar *oldbase = ctxt->input->base;
8997 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008998 if (ctxt->instate == XML_PARSER_EOF)
8999 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009000 if (oldbase != ctxt->input->base) {
9001 long delta = ctxt->input->base - oldbase;
9002 start = start + delta;
9003 in = in + delta;
9004 last = last + delta;
9005 }
9006 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009007 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9008 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9009 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009010 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009011 return(NULL);
9012 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009013 }
9014 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009015 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9016 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9017 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009018 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009019 return(NULL);
9020 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009021 if (*in != limit) goto need_complex;
9022 } else {
9023 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9024 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9025 in++;
9026 if (in >= end) {
9027 const xmlChar *oldbase = ctxt->input->base;
9028 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009029 if (ctxt->instate == XML_PARSER_EOF)
9030 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009031 if (oldbase != ctxt->input->base) {
9032 long delta = ctxt->input->base - oldbase;
9033 start = start + delta;
9034 in = in + delta;
9035 }
9036 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009037 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9038 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9039 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009040 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009041 return(NULL);
9042 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009043 }
9044 }
9045 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009046 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9047 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9048 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009049 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009050 return(NULL);
9051 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009052 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009053 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009054 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009055 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009056 *len = last - start;
9057 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009058 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009059 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009060 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009061 }
9062 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009063 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009064 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009065need_complex:
9066 if (alloc) *alloc = 1;
9067 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009068}
9069
9070/**
9071 * xmlParseAttribute2:
9072 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009073 * @pref: the element prefix
9074 * @elem: the element name
9075 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009076 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009077 * @len: an int * to save the length of the attribute
9078 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009079 *
9080 * parse an attribute in the new SAX2 framework.
9081 *
9082 * Returns the attribute name, and the value in *value, .
9083 */
9084
9085static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009086xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009087 const xmlChar * pref, const xmlChar * elem,
9088 const xmlChar ** prefix, xmlChar ** value,
9089 int *len, int *alloc)
9090{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009091 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009092 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009093 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009094
9095 *value = NULL;
9096 GROW;
9097 name = xmlParseQName(ctxt, prefix);
9098 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009099 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9100 "error parsing attribute name\n");
9101 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009102 }
9103
9104 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009105 * get the type if needed
9106 */
9107 if (ctxt->attsSpecial != NULL) {
9108 int type;
9109
9110 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009111 pref, elem, *prefix, name);
9112 if (type != 0)
9113 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009114 }
9115
9116 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009117 * read the value
9118 */
9119 SKIP_BLANKS;
9120 if (RAW == '=') {
9121 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009122 SKIP_BLANKS;
9123 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9124 if (normalize) {
9125 /*
9126 * Sometimes a second normalisation pass for spaces is needed
9127 * but that only happens if charrefs or entities refernces
9128 * have been used in the attribute value, i.e. the attribute
9129 * value have been extracted in an allocated string already.
9130 */
9131 if (*alloc) {
9132 const xmlChar *val2;
9133
9134 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009135 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009136 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009137 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009138 }
9139 }
9140 }
9141 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009142 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009143 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9144 "Specification mandate value for attribute %s\n",
9145 name);
9146 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009147 }
9148
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009149 if (*prefix == ctxt->str_xml) {
9150 /*
9151 * Check that xml:lang conforms to the specification
9152 * No more registered as an error, just generate a warning now
9153 * since this was deprecated in XML second edition
9154 */
9155 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9156 internal_val = xmlStrndup(val, *len);
9157 if (!xmlCheckLanguageID(internal_val)) {
9158 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9159 "Malformed value for xml:lang : %s\n",
9160 internal_val, NULL);
9161 }
9162 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009163
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009164 /*
9165 * Check that xml:space conforms to the specification
9166 */
9167 if (xmlStrEqual(name, BAD_CAST "space")) {
9168 internal_val = xmlStrndup(val, *len);
9169 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9170 *(ctxt->space) = 0;
9171 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9172 *(ctxt->space) = 1;
9173 else {
9174 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9175 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9176 internal_val, NULL);
9177 }
9178 }
9179 if (internal_val) {
9180 xmlFree(internal_val);
9181 }
9182 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009183
9184 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009185 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009186}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009187/**
9188 * xmlParseStartTag2:
9189 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009190 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009191 * parse a start of tag either for rule element or
9192 * EmptyElement. In both case we don't parse the tag closing chars.
9193 * This routine is called when running SAX2 parsing
9194 *
9195 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9196 *
9197 * [ WFC: Unique Att Spec ]
9198 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009199 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009200 *
9201 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9202 *
9203 * [ WFC: Unique Att Spec ]
9204 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009205 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009206 *
9207 * With namespace:
9208 *
9209 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9210 *
9211 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9212 *
9213 * Returns the element name parsed
9214 */
9215
9216static const xmlChar *
9217xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009218 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009219 const xmlChar *localname;
9220 const xmlChar *prefix;
9221 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009222 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009223 const xmlChar *nsname;
9224 xmlChar *attvalue;
9225 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009226 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009227 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009228 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009229 const xmlChar *base;
9230 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009231 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009232
9233 if (RAW != '<') return(NULL);
9234 NEXT1;
9235
9236 /*
9237 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9238 * point since the attribute values may be stored as pointers to
9239 * the buffer and calling SHRINK would destroy them !
9240 * The Shrinking is only possible once the full set of attribute
9241 * callbacks have been done.
9242 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009243reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009244 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009245 base = ctxt->input->base;
9246 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009247 oldline = ctxt->input->line;
9248 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009249 nbatts = 0;
9250 nratts = 0;
9251 nbdef = 0;
9252 nbNs = 0;
9253 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009254 /* Forget any namespaces added during an earlier parse of this element. */
9255 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009256
9257 localname = xmlParseQName(ctxt, &prefix);
9258 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009259 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9260 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009261 return(NULL);
9262 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009263 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009264
9265 /*
9266 * Now parse the attributes, it ends up with the ending
9267 *
9268 * (S Attribute)* S?
9269 */
9270 SKIP_BLANKS;
9271 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009272 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009273
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009274 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009275 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009276 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009277 const xmlChar *q = CUR_PTR;
9278 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009279 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009280
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009281 attname = xmlParseAttribute2(ctxt, prefix, localname,
9282 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009283 if (ctxt->input->base != base) {
9284 if ((attvalue != NULL) && (alloc != 0))
9285 xmlFree(attvalue);
9286 attvalue = NULL;
9287 goto base_changed;
9288 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009289 if ((attname != NULL) && (attvalue != NULL)) {
9290 if (len < 0) len = xmlStrlen(attvalue);
9291 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009292 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9293 xmlURIPtr uri;
9294
9295 if (*URL != 0) {
9296 uri = xmlParseURI((const char *) URL);
9297 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009298 xmlNsErr(ctxt, XML_WAR_NS_URI,
9299 "xmlns: '%s' is not a valid URI\n",
9300 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009301 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009302 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009303 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9304 "xmlns: URI %s is not absolute\n",
9305 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009306 }
9307 xmlFreeURI(uri);
9308 }
Daniel Veillard37334572008-07-31 08:20:02 +00009309 if (URL == ctxt->str_xml_ns) {
9310 if (attname != ctxt->str_xml) {
9311 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9312 "xml namespace URI cannot be the default namespace\n",
9313 NULL, NULL, NULL);
9314 }
9315 goto skip_default_ns;
9316 }
9317 if ((len == 29) &&
9318 (xmlStrEqual(URL,
9319 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9320 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9321 "reuse of the xmlns namespace name is forbidden\n",
9322 NULL, NULL, NULL);
9323 goto skip_default_ns;
9324 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009325 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009326 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009327 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009328 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009329 for (j = 1;j <= nbNs;j++)
9330 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9331 break;
9332 if (j <= nbNs)
9333 xmlErrAttributeDup(ctxt, NULL, attname);
9334 else
9335 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009336skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009337 if (alloc != 0) xmlFree(attvalue);
9338 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009339 continue;
9340 }
9341 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009342 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9343 xmlURIPtr uri;
9344
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009345 if (attname == ctxt->str_xml) {
9346 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009347 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9348 "xml namespace prefix mapped to wrong URI\n",
9349 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009350 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009351 /*
9352 * Do not keep a namespace definition node
9353 */
Daniel Veillard37334572008-07-31 08:20:02 +00009354 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009355 }
Daniel Veillard37334572008-07-31 08:20:02 +00009356 if (URL == ctxt->str_xml_ns) {
9357 if (attname != ctxt->str_xml) {
9358 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9359 "xml namespace URI mapped to wrong prefix\n",
9360 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009361 }
Daniel Veillard37334572008-07-31 08:20:02 +00009362 goto skip_ns;
9363 }
9364 if (attname == ctxt->str_xmlns) {
9365 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9366 "redefinition of the xmlns prefix is forbidden\n",
9367 NULL, NULL, NULL);
9368 goto skip_ns;
9369 }
9370 if ((len == 29) &&
9371 (xmlStrEqual(URL,
9372 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9373 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9374 "reuse of the xmlns namespace name is forbidden\n",
9375 NULL, NULL, NULL);
9376 goto skip_ns;
9377 }
9378 if ((URL == NULL) || (URL[0] == 0)) {
9379 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9380 "xmlns:%s: Empty XML namespace is not allowed\n",
9381 attname, NULL, NULL);
9382 goto skip_ns;
9383 } else {
9384 uri = xmlParseURI((const char *) URL);
9385 if (uri == NULL) {
9386 xmlNsErr(ctxt, XML_WAR_NS_URI,
9387 "xmlns:%s: '%s' is not a valid URI\n",
9388 attname, URL, NULL);
9389 } else {
9390 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9391 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9392 "xmlns:%s: URI %s is not absolute\n",
9393 attname, URL, NULL);
9394 }
9395 xmlFreeURI(uri);
9396 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009397 }
9398
Daniel Veillard0fb18932003-09-07 09:14:37 +00009399 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009400 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009401 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009402 for (j = 1;j <= nbNs;j++)
9403 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9404 break;
9405 if (j <= nbNs)
9406 xmlErrAttributeDup(ctxt, aprefix, attname);
9407 else
9408 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009409skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009410 if (alloc != 0) xmlFree(attvalue);
9411 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009412 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009413 continue;
9414 }
9415
9416 /*
9417 * Add the pair to atts
9418 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009419 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9420 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009421 if (attvalue[len] == 0)
9422 xmlFree(attvalue);
9423 goto failed;
9424 }
9425 maxatts = ctxt->maxatts;
9426 atts = ctxt->atts;
9427 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009428 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009429 atts[nbatts++] = attname;
9430 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009431 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009432 atts[nbatts++] = attvalue;
9433 attvalue += len;
9434 atts[nbatts++] = attvalue;
9435 /*
9436 * tag if some deallocation is needed
9437 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009438 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009439 } else {
9440 if ((attvalue != NULL) && (attvalue[len] == 0))
9441 xmlFree(attvalue);
9442 }
9443
Daniel Veillard37334572008-07-31 08:20:02 +00009444failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009445
9446 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009447 if (ctxt->instate == XML_PARSER_EOF)
9448 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009449 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009450 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9451 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009452 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9454 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009455 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009456 }
9457 SKIP_BLANKS;
9458 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9459 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009460 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009461 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009462 break;
9463 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009464 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009465 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009466 }
9467
Daniel Veillard0fb18932003-09-07 09:14:37 +00009468 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009469 * The attributes defaulting
9470 */
9471 if (ctxt->attsDefault != NULL) {
9472 xmlDefAttrsPtr defaults;
9473
9474 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9475 if (defaults != NULL) {
9476 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009477 attname = defaults->values[5 * i];
9478 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009479
9480 /*
9481 * special work for namespaces defaulted defs
9482 */
9483 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9484 /*
9485 * check that it's not a defined namespace
9486 */
9487 for (j = 1;j <= nbNs;j++)
9488 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9489 break;
9490 if (j <= nbNs) continue;
9491
9492 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009493 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009494 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009495 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009496 nbNs++;
9497 }
9498 } else if (aprefix == ctxt->str_xmlns) {
9499 /*
9500 * check that it's not a defined namespace
9501 */
9502 for (j = 1;j <= nbNs;j++)
9503 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9504 break;
9505 if (j <= nbNs) continue;
9506
9507 nsname = xmlGetNamespace(ctxt, attname);
9508 if (nsname != defaults->values[2]) {
9509 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009510 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009511 nbNs++;
9512 }
9513 } else {
9514 /*
9515 * check that it's not a defined attribute
9516 */
9517 for (j = 0;j < nbatts;j+=5) {
9518 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9519 break;
9520 }
9521 if (j < nbatts) continue;
9522
9523 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9524 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009525 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009526 }
9527 maxatts = ctxt->maxatts;
9528 atts = ctxt->atts;
9529 }
9530 atts[nbatts++] = attname;
9531 atts[nbatts++] = aprefix;
9532 if (aprefix == NULL)
9533 atts[nbatts++] = NULL;
9534 else
9535 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009536 atts[nbatts++] = defaults->values[5 * i + 2];
9537 atts[nbatts++] = defaults->values[5 * i + 3];
9538 if ((ctxt->standalone == 1) &&
9539 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009540 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009541 "standalone: attribute %s on %s defaulted from external subset\n",
9542 attname, localname);
9543 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009544 nbdef++;
9545 }
9546 }
9547 }
9548 }
9549
Daniel Veillarde70c8772003-11-25 07:21:18 +00009550 /*
9551 * The attributes checkings
9552 */
9553 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009554 /*
9555 * The default namespace does not apply to attribute names.
9556 */
9557 if (atts[i + 1] != NULL) {
9558 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9559 if (nsname == NULL) {
9560 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9561 "Namespace prefix %s for %s on %s is not defined\n",
9562 atts[i + 1], atts[i], localname);
9563 }
9564 atts[i + 2] = nsname;
9565 } else
9566 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009567 /*
9568 * [ WFC: Unique Att Spec ]
9569 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009570 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009571 * As extended by the Namespace in XML REC.
9572 */
9573 for (j = 0; j < i;j += 5) {
9574 if (atts[i] == atts[j]) {
9575 if (atts[i+1] == atts[j+1]) {
9576 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9577 break;
9578 }
9579 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9580 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9581 "Namespaced Attribute %s in '%s' redefined\n",
9582 atts[i], nsname, NULL);
9583 break;
9584 }
9585 }
9586 }
9587 }
9588
Daniel Veillarde57ec792003-09-10 10:50:59 +00009589 nsname = xmlGetNamespace(ctxt, prefix);
9590 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009591 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9592 "Namespace prefix %s on %s is not defined\n",
9593 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009594 }
9595 *pref = prefix;
9596 *URI = nsname;
9597
9598 /*
9599 * SAX: Start of Element !
9600 */
9601 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9602 (!ctxt->disableSAX)) {
9603 if (nbNs > 0)
9604 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9605 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9606 nbatts / 5, nbdef, atts);
9607 else
9608 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9609 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9610 }
9611
9612 /*
9613 * Free up attribute allocated strings if needed
9614 */
9615 if (attval != 0) {
9616 for (i = 3,j = 0; j < nratts;i += 5,j++)
9617 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9618 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009619 }
9620
9621 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009622
9623base_changed:
9624 /*
9625 * the attribute strings are valid iif the base didn't changed
9626 */
9627 if (attval != 0) {
9628 for (i = 3,j = 0; j < nratts;i += 5,j++)
9629 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9630 xmlFree((xmlChar *) atts[i]);
9631 }
9632 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009633 ctxt->input->line = oldline;
9634 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009635 if (ctxt->wellFormed == 1) {
9636 goto reparse;
9637 }
9638 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009639}
9640
9641/**
9642 * xmlParseEndTag2:
9643 * @ctxt: an XML parser context
9644 * @line: line of the start tag
9645 * @nsNr: number of namespaces on the start tag
9646 *
9647 * parse an end of tag
9648 *
9649 * [42] ETag ::= '</' Name S? '>'
9650 *
9651 * With namespace
9652 *
9653 * [NS 9] ETag ::= '</' QName S? '>'
9654 */
9655
9656static void
9657xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009658 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009659 const xmlChar *name;
9660
9661 GROW;
9662 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009663 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009664 return;
9665 }
9666 SKIP(2);
9667
William M. Brack13dfa872004-09-18 04:52:08 +00009668 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009669 if (ctxt->input->cur[tlen] == '>') {
9670 ctxt->input->cur += tlen + 1;
9671 goto done;
9672 }
9673 ctxt->input->cur += tlen;
9674 name = (xmlChar*)1;
9675 } else {
9676 if (prefix == NULL)
9677 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9678 else
9679 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9680 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009681
9682 /*
9683 * We should definitely be at the ending "S? '>'" part
9684 */
9685 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009686 if (ctxt->instate == XML_PARSER_EOF)
9687 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009688 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009689 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009690 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009691 } else
9692 NEXT1;
9693
9694 /*
9695 * [ WFC: Element Type Match ]
9696 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009697 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009698 *
9699 */
9700 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009701 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009702 if ((line == 0) && (ctxt->node != NULL))
9703 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009704 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009705 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009706 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009707 }
9708
9709 /*
9710 * SAX: End of Tag
9711 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009712done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009713 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9714 (!ctxt->disableSAX))
9715 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9716
Daniel Veillard0fb18932003-09-07 09:14:37 +00009717 spacePop(ctxt);
9718 if (nsNr != 0)
9719 nsPop(ctxt, nsNr);
9720 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009721}
9722
9723/**
Owen Taylor3473f882001-02-23 17:55:21 +00009724 * xmlParseCDSect:
9725 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009726 *
Owen Taylor3473f882001-02-23 17:55:21 +00009727 * Parse escaped pure raw content.
9728 *
9729 * [18] CDSect ::= CDStart CData CDEnd
9730 *
9731 * [19] CDStart ::= '<![CDATA['
9732 *
9733 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9734 *
9735 * [21] CDEnd ::= ']]>'
9736 */
9737void
9738xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9739 xmlChar *buf = NULL;
9740 int len = 0;
9741 int size = XML_PARSER_BUFFER_SIZE;
9742 int r, rl;
9743 int s, sl;
9744 int cur, l;
9745 int count = 0;
9746
Daniel Veillard8f597c32003-10-06 08:19:27 +00009747 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009748 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009749 SKIP(9);
9750 } else
9751 return;
9752
9753 ctxt->instate = XML_PARSER_CDATA_SECTION;
9754 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009755 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009756 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009757 ctxt->instate = XML_PARSER_CONTENT;
9758 return;
9759 }
9760 NEXTL(rl);
9761 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009762 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009763 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009764 ctxt->instate = XML_PARSER_CONTENT;
9765 return;
9766 }
9767 NEXTL(sl);
9768 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009769 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009770 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009771 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009772 return;
9773 }
William M. Brack871611b2003-10-18 04:53:14 +00009774 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009775 ((r != ']') || (s != ']') || (cur != '>'))) {
9776 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009777 xmlChar *tmp;
9778
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009779 if ((size > XML_MAX_TEXT_LENGTH) &&
9780 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9781 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9782 "CData section too big found", NULL);
9783 xmlFree (buf);
9784 return;
9785 }
9786 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009787 if (tmp == NULL) {
9788 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009789 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009790 return;
9791 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009792 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009793 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009794 }
9795 COPY_BUF(rl,buf,len,r);
9796 r = s;
9797 rl = sl;
9798 s = cur;
9799 sl = l;
9800 count++;
9801 if (count > 50) {
9802 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009803 if (ctxt->instate == XML_PARSER_EOF) {
9804 xmlFree(buf);
9805 return;
9806 }
Owen Taylor3473f882001-02-23 17:55:21 +00009807 count = 0;
9808 }
9809 NEXTL(l);
9810 cur = CUR_CHAR(l);
9811 }
9812 buf[len] = 0;
9813 ctxt->instate = XML_PARSER_CONTENT;
9814 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009815 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009816 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009817 xmlFree(buf);
9818 return;
9819 }
9820 NEXTL(l);
9821
9822 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009823 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009824 */
9825 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9826 if (ctxt->sax->cdataBlock != NULL)
9827 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009828 else if (ctxt->sax->characters != NULL)
9829 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009830 }
9831 xmlFree(buf);
9832}
9833
9834/**
9835 * xmlParseContent:
9836 * @ctxt: an XML parser context
9837 *
9838 * Parse a content:
9839 *
9840 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9841 */
9842
9843void
9844xmlParseContent(xmlParserCtxtPtr ctxt) {
9845 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009846 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009847 ((RAW != '<') || (NXT(1) != '/')) &&
9848 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009849 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009850 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009851 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009852
9853 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009854 * First case : a Processing Instruction.
9855 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009856 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009857 xmlParsePI(ctxt);
9858 }
9859
9860 /*
9861 * Second case : a CDSection
9862 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009863 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009864 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009865 xmlParseCDSect(ctxt);
9866 }
9867
9868 /*
9869 * Third case : a comment
9870 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009871 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009872 (NXT(2) == '-') && (NXT(3) == '-')) {
9873 xmlParseComment(ctxt);
9874 ctxt->instate = XML_PARSER_CONTENT;
9875 }
9876
9877 /*
9878 * Fourth case : a sub-element.
9879 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009880 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009881 xmlParseElement(ctxt);
9882 }
9883
9884 /*
9885 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009886 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009887 */
9888
Daniel Veillard21a0f912001-02-25 19:54:14 +00009889 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009890 xmlParseReference(ctxt);
9891 }
9892
9893 /*
9894 * Last case, text. Note that References are handled directly.
9895 */
9896 else {
9897 xmlParseCharData(ctxt, 0);
9898 }
9899
9900 GROW;
9901 /*
9902 * Pop-up of finished entities.
9903 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009904 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009905 xmlPopInput(ctxt);
9906 SHRINK;
9907
Daniel Veillardfdc91562002-07-01 21:52:03 +00009908 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009909 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9910 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009911 ctxt->instate = XML_PARSER_EOF;
9912 break;
9913 }
9914 }
9915}
9916
9917/**
9918 * xmlParseElement:
9919 * @ctxt: an XML parser context
9920 *
9921 * parse an XML element, this is highly recursive
9922 *
9923 * [39] element ::= EmptyElemTag | STag content ETag
9924 *
9925 * [ WFC: Element Type Match ]
9926 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009927 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009928 *
Owen Taylor3473f882001-02-23 17:55:21 +00009929 */
9930
9931void
9932xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009933 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009934 const xmlChar *prefix = NULL;
9935 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009936 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009937 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009938 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009939 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009940
Daniel Veillard8915c152008-08-26 13:05:34 +00009941 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9942 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9943 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9944 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9945 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009946 ctxt->instate = XML_PARSER_EOF;
9947 return;
9948 }
9949
Owen Taylor3473f882001-02-23 17:55:21 +00009950 /* Capture start position */
9951 if (ctxt->record_info) {
9952 node_info.begin_pos = ctxt->input->consumed +
9953 (CUR_PTR - ctxt->input->base);
9954 node_info.begin_line = ctxt->input->line;
9955 }
9956
9957 if (ctxt->spaceNr == 0)
9958 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009959 else if (*ctxt->space == -2)
9960 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009961 else
9962 spacePush(ctxt, *ctxt->space);
9963
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009964 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009965#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009966 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009967#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009968 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009969#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009970 else
9971 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009972#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009973 if (ctxt->instate == XML_PARSER_EOF)
9974 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009975 if (name == NULL) {
9976 spacePop(ctxt);
9977 return;
9978 }
9979 namePush(ctxt, name);
9980 ret = ctxt->node;
9981
Daniel Veillard4432df22003-09-28 18:58:27 +00009982#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009983 /*
9984 * [ VC: Root Element Type ]
9985 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009986 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009987 */
9988 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9989 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9990 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009991#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009992
9993 /*
9994 * Check for an Empty Element.
9995 */
9996 if ((RAW == '/') && (NXT(1) == '>')) {
9997 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009998 if (ctxt->sax2) {
9999 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10000 (!ctxt->disableSAX))
10001 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010002#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010003 } else {
10004 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10005 (!ctxt->disableSAX))
10006 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010007#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010008 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010009 namePop(ctxt);
10010 spacePop(ctxt);
10011 if (nsNr != ctxt->nsNr)
10012 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010013 if ( ret != NULL && ctxt->record_info ) {
10014 node_info.end_pos = ctxt->input->consumed +
10015 (CUR_PTR - ctxt->input->base);
10016 node_info.end_line = ctxt->input->line;
10017 node_info.node = ret;
10018 xmlParserAddNodeInfo(ctxt, &node_info);
10019 }
10020 return;
10021 }
10022 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010023 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010024 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010025 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10026 "Couldn't find end of Start Tag %s line %d\n",
10027 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010028
10029 /*
10030 * end of parsing of this node.
10031 */
10032 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010033 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010034 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010035 if (nsNr != ctxt->nsNr)
10036 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010037
10038 /*
10039 * Capture end position and add node
10040 */
10041 if ( ret != NULL && ctxt->record_info ) {
10042 node_info.end_pos = ctxt->input->consumed +
10043 (CUR_PTR - ctxt->input->base);
10044 node_info.end_line = ctxt->input->line;
10045 node_info.node = ret;
10046 xmlParserAddNodeInfo(ctxt, &node_info);
10047 }
10048 return;
10049 }
10050
10051 /*
10052 * Parse the content of the element:
10053 */
10054 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010055 if (ctxt->instate == XML_PARSER_EOF)
10056 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010057 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010058 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010059 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010060 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010061
10062 /*
10063 * end of parsing of this node.
10064 */
10065 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010066 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010067 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010068 if (nsNr != ctxt->nsNr)
10069 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010070 return;
10071 }
10072
10073 /*
10074 * parse the end of tag: '</' should be here.
10075 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010076 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010077 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010078 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010079 }
10080#ifdef LIBXML_SAX1_ENABLED
10081 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010082 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010083#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010084
10085 /*
10086 * Capture end position and add node
10087 */
10088 if ( ret != NULL && ctxt->record_info ) {
10089 node_info.end_pos = ctxt->input->consumed +
10090 (CUR_PTR - ctxt->input->base);
10091 node_info.end_line = ctxt->input->line;
10092 node_info.node = ret;
10093 xmlParserAddNodeInfo(ctxt, &node_info);
10094 }
10095}
10096
10097/**
10098 * xmlParseVersionNum:
10099 * @ctxt: an XML parser context
10100 *
10101 * parse the XML version value.
10102 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010103 * [26] VersionNum ::= '1.' [0-9]+
10104 *
10105 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010106 *
10107 * Returns the string giving the XML version number, or NULL
10108 */
10109xmlChar *
10110xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10111 xmlChar *buf = NULL;
10112 int len = 0;
10113 int size = 10;
10114 xmlChar cur;
10115
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010116 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010117 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010118 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010119 return(NULL);
10120 }
10121 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010122 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010123 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010124 return(NULL);
10125 }
10126 buf[len++] = cur;
10127 NEXT;
10128 cur=CUR;
10129 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010130 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010131 return(NULL);
10132 }
10133 buf[len++] = cur;
10134 NEXT;
10135 cur=CUR;
10136 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010137 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010138 xmlChar *tmp;
10139
Owen Taylor3473f882001-02-23 17:55:21 +000010140 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010141 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10142 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010143 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010144 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010145 return(NULL);
10146 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010147 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010148 }
10149 buf[len++] = cur;
10150 NEXT;
10151 cur=CUR;
10152 }
10153 buf[len] = 0;
10154 return(buf);
10155}
10156
10157/**
10158 * xmlParseVersionInfo:
10159 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010160 *
Owen Taylor3473f882001-02-23 17:55:21 +000010161 * parse the XML version.
10162 *
10163 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010164 *
Owen Taylor3473f882001-02-23 17:55:21 +000010165 * [25] Eq ::= S? '=' S?
10166 *
10167 * Returns the version string, e.g. "1.0"
10168 */
10169
10170xmlChar *
10171xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10172 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010173
Daniel Veillarda07050d2003-10-19 14:46:32 +000010174 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010175 SKIP(7);
10176 SKIP_BLANKS;
10177 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010178 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010179 return(NULL);
10180 }
10181 NEXT;
10182 SKIP_BLANKS;
10183 if (RAW == '"') {
10184 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010185 version = xmlParseVersionNum(ctxt);
10186 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010187 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010188 } else
10189 NEXT;
10190 } else if (RAW == '\''){
10191 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010192 version = xmlParseVersionNum(ctxt);
10193 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010194 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010195 } else
10196 NEXT;
10197 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010198 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010199 }
10200 }
10201 return(version);
10202}
10203
10204/**
10205 * xmlParseEncName:
10206 * @ctxt: an XML parser context
10207 *
10208 * parse the XML encoding name
10209 *
10210 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10211 *
10212 * Returns the encoding name value or NULL
10213 */
10214xmlChar *
10215xmlParseEncName(xmlParserCtxtPtr ctxt) {
10216 xmlChar *buf = NULL;
10217 int len = 0;
10218 int size = 10;
10219 xmlChar cur;
10220
10221 cur = CUR;
10222 if (((cur >= 'a') && (cur <= 'z')) ||
10223 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010224 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010225 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010226 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010227 return(NULL);
10228 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010229
Owen Taylor3473f882001-02-23 17:55:21 +000010230 buf[len++] = cur;
10231 NEXT;
10232 cur = CUR;
10233 while (((cur >= 'a') && (cur <= 'z')) ||
10234 ((cur >= 'A') && (cur <= 'Z')) ||
10235 ((cur >= '0') && (cur <= '9')) ||
10236 (cur == '.') || (cur == '_') ||
10237 (cur == '-')) {
10238 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010239 xmlChar *tmp;
10240
Owen Taylor3473f882001-02-23 17:55:21 +000010241 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010242 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10243 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010244 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010245 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010246 return(NULL);
10247 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010248 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010249 }
10250 buf[len++] = cur;
10251 NEXT;
10252 cur = CUR;
10253 if (cur == 0) {
10254 SHRINK;
10255 GROW;
10256 cur = CUR;
10257 }
10258 }
10259 buf[len] = 0;
10260 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010261 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010262 }
10263 return(buf);
10264}
10265
10266/**
10267 * xmlParseEncodingDecl:
10268 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010269 *
Owen Taylor3473f882001-02-23 17:55:21 +000010270 * parse the XML encoding declaration
10271 *
10272 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10273 *
10274 * this setups the conversion filters.
10275 *
10276 * Returns the encoding value or NULL
10277 */
10278
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010279const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010280xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10281 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010282
10283 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010284 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010285 SKIP(8);
10286 SKIP_BLANKS;
10287 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010288 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010289 return(NULL);
10290 }
10291 NEXT;
10292 SKIP_BLANKS;
10293 if (RAW == '"') {
10294 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010295 encoding = xmlParseEncName(ctxt);
10296 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010297 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010298 } else
10299 NEXT;
10300 } else if (RAW == '\''){
10301 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010302 encoding = xmlParseEncName(ctxt);
10303 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010304 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010305 } else
10306 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010307 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010308 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010309 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010310
10311 /*
10312 * Non standard parsing, allowing the user to ignore encoding
10313 */
10314 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10315 return(encoding);
10316
Daniel Veillard6b621b82003-08-11 15:03:34 +000010317 /*
10318 * UTF-16 encoding stwich has already taken place at this stage,
10319 * more over the little-endian/big-endian selection is already done
10320 */
10321 if ((encoding != NULL) &&
10322 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10323 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010324 /*
10325 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010326 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010327 * document is apparently UTF-8 compatible, then raise an
10328 * encoding mismatch fatal error
10329 */
10330 if ((ctxt->encoding == NULL) &&
10331 (ctxt->input->buf != NULL) &&
10332 (ctxt->input->buf->encoder == NULL)) {
10333 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10334 "Document labelled UTF-16 but has UTF-8 content\n");
10335 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010336 if (ctxt->encoding != NULL)
10337 xmlFree((xmlChar *) ctxt->encoding);
10338 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010339 }
10340 /*
10341 * UTF-8 encoding is handled natively
10342 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010343 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010344 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10345 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010346 if (ctxt->encoding != NULL)
10347 xmlFree((xmlChar *) ctxt->encoding);
10348 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010349 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010350 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010351 xmlCharEncodingHandlerPtr handler;
10352
10353 if (ctxt->input->encoding != NULL)
10354 xmlFree((xmlChar *) ctxt->input->encoding);
10355 ctxt->input->encoding = encoding;
10356
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010357 handler = xmlFindCharEncodingHandler((const char *) encoding);
10358 if (handler != NULL) {
10359 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010360 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010361 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010362 "Unsupported encoding %s\n", encoding);
10363 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010364 }
10365 }
10366 }
10367 return(encoding);
10368}
10369
10370/**
10371 * xmlParseSDDecl:
10372 * @ctxt: an XML parser context
10373 *
10374 * parse the XML standalone declaration
10375 *
10376 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010377 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010378 *
10379 * [ VC: Standalone Document Declaration ]
10380 * TODO The standalone document declaration must have the value "no"
10381 * if any external markup declarations contain declarations of:
10382 * - attributes with default values, if elements to which these
10383 * attributes apply appear in the document without specifications
10384 * of values for these attributes, or
10385 * - entities (other than amp, lt, gt, apos, quot), if references
10386 * to those entities appear in the document, or
10387 * - attributes with values subject to normalization, where the
10388 * attribute appears in the document with a value which will change
10389 * as a result of normalization, or
10390 * - element types with element content, if white space occurs directly
10391 * within any instance of those types.
10392 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010393 * Returns:
10394 * 1 if standalone="yes"
10395 * 0 if standalone="no"
10396 * -2 if standalone attribute is missing or invalid
10397 * (A standalone value of -2 means that the XML declaration was found,
10398 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010399 */
10400
10401int
10402xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010403 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010404
10405 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010406 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010407 SKIP(10);
10408 SKIP_BLANKS;
10409 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010410 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010411 return(standalone);
10412 }
10413 NEXT;
10414 SKIP_BLANKS;
10415 if (RAW == '\''){
10416 NEXT;
10417 if ((RAW == 'n') && (NXT(1) == 'o')) {
10418 standalone = 0;
10419 SKIP(2);
10420 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10421 (NXT(2) == 's')) {
10422 standalone = 1;
10423 SKIP(3);
10424 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010425 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010426 }
10427 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010428 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010429 } else
10430 NEXT;
10431 } else if (RAW == '"'){
10432 NEXT;
10433 if ((RAW == 'n') && (NXT(1) == 'o')) {
10434 standalone = 0;
10435 SKIP(2);
10436 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10437 (NXT(2) == 's')) {
10438 standalone = 1;
10439 SKIP(3);
10440 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010441 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010442 }
10443 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010444 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010445 } else
10446 NEXT;
10447 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010448 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010449 }
10450 }
10451 return(standalone);
10452}
10453
10454/**
10455 * xmlParseXMLDecl:
10456 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010457 *
Owen Taylor3473f882001-02-23 17:55:21 +000010458 * parse an XML declaration header
10459 *
10460 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10461 */
10462
10463void
10464xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10465 xmlChar *version;
10466
10467 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010468 * This value for standalone indicates that the document has an
10469 * XML declaration but it does not have a standalone attribute.
10470 * It will be overwritten later if a standalone attribute is found.
10471 */
10472 ctxt->input->standalone = -2;
10473
10474 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010475 * We know that '<?xml' is here.
10476 */
10477 SKIP(5);
10478
William M. Brack76e95df2003-10-18 16:20:14 +000010479 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010480 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10481 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010482 }
10483 SKIP_BLANKS;
10484
10485 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010486 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010487 */
10488 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010489 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010490 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010491 } else {
10492 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10493 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010494 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010495 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010496 if (ctxt->options & XML_PARSE_OLD10) {
10497 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10498 "Unsupported version '%s'\n",
10499 version);
10500 } else {
10501 if ((version[0] == '1') && ((version[1] == '.'))) {
10502 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10503 "Unsupported version '%s'\n",
10504 version, NULL);
10505 } else {
10506 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10507 "Unsupported version '%s'\n",
10508 version);
10509 }
10510 }
Daniel Veillard19840942001-11-29 16:11:38 +000010511 }
10512 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010513 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010514 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010515 }
Owen Taylor3473f882001-02-23 17:55:21 +000010516
10517 /*
10518 * We may have the encoding declaration
10519 */
William M. Brack76e95df2003-10-18 16:20:14 +000010520 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010521 if ((RAW == '?') && (NXT(1) == '>')) {
10522 SKIP(2);
10523 return;
10524 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010525 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010526 }
10527 xmlParseEncodingDecl(ctxt);
10528 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10529 /*
10530 * The XML REC instructs us to stop parsing right here
10531 */
10532 return;
10533 }
10534
10535 /*
10536 * We may have the standalone status.
10537 */
William M. Brack76e95df2003-10-18 16:20:14 +000010538 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010539 if ((RAW == '?') && (NXT(1) == '>')) {
10540 SKIP(2);
10541 return;
10542 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010543 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010544 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010545
10546 /*
10547 * We can grow the input buffer freely at that point
10548 */
10549 GROW;
10550
Owen Taylor3473f882001-02-23 17:55:21 +000010551 SKIP_BLANKS;
10552 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10553
10554 SKIP_BLANKS;
10555 if ((RAW == '?') && (NXT(1) == '>')) {
10556 SKIP(2);
10557 } else if (RAW == '>') {
10558 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010559 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010560 NEXT;
10561 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010562 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010563 MOVETO_ENDTAG(CUR_PTR);
10564 NEXT;
10565 }
10566}
10567
10568/**
10569 * xmlParseMisc:
10570 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010571 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010572 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010573 *
10574 * [27] Misc ::= Comment | PI | S
10575 */
10576
10577void
10578xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010579 while ((ctxt->instate != XML_PARSER_EOF) &&
10580 (((RAW == '<') && (NXT(1) == '?')) ||
10581 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10582 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010583 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010584 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010585 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010586 NEXT;
10587 } else
10588 xmlParseComment(ctxt);
10589 }
10590}
10591
10592/**
10593 * xmlParseDocument:
10594 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010595 *
Owen Taylor3473f882001-02-23 17:55:21 +000010596 * parse an XML document (and build a tree if using the standard SAX
10597 * interface).
10598 *
10599 * [1] document ::= prolog element Misc*
10600 *
10601 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10602 *
10603 * Returns 0, -1 in case of error. the parser context is augmented
10604 * as a result of the parsing.
10605 */
10606
10607int
10608xmlParseDocument(xmlParserCtxtPtr ctxt) {
10609 xmlChar start[4];
10610 xmlCharEncoding enc;
10611
10612 xmlInitParser();
10613
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010614 if ((ctxt == NULL) || (ctxt->input == NULL))
10615 return(-1);
10616
Owen Taylor3473f882001-02-23 17:55:21 +000010617 GROW;
10618
10619 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010620 * SAX: detecting the level.
10621 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010622 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010623
10624 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010625 * SAX: beginning of the document processing.
10626 */
10627 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10628 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010629 if (ctxt->instate == XML_PARSER_EOF)
10630 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010631
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010632 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010633 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010634 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010635 * Get the 4 first bytes and decode the charset
10636 * if enc != XML_CHAR_ENCODING_NONE
10637 * plug some encoding conversion routines.
10638 */
10639 start[0] = RAW;
10640 start[1] = NXT(1);
10641 start[2] = NXT(2);
10642 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010643 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010644 if (enc != XML_CHAR_ENCODING_NONE) {
10645 xmlSwitchEncoding(ctxt, enc);
10646 }
Owen Taylor3473f882001-02-23 17:55:21 +000010647 }
10648
10649
10650 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010651 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010652 }
10653
10654 /*
10655 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010656 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010657 * than just the first line, unless the amount of data is really
10658 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010659 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010660 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10661 GROW;
10662 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010663 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010664
10665 /*
10666 * Note that we will switch encoding on the fly.
10667 */
10668 xmlParseXMLDecl(ctxt);
10669 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10670 /*
10671 * The XML REC instructs us to stop parsing right here
10672 */
10673 return(-1);
10674 }
10675 ctxt->standalone = ctxt->input->standalone;
10676 SKIP_BLANKS;
10677 } else {
10678 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10679 }
10680 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10681 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010682 if (ctxt->instate == XML_PARSER_EOF)
10683 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010684
10685 /*
10686 * The Misc part of the Prolog
10687 */
10688 GROW;
10689 xmlParseMisc(ctxt);
10690
10691 /*
10692 * Then possibly doc type declaration(s) and more Misc
10693 * (doctypedecl Misc*)?
10694 */
10695 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010696 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010697
10698 ctxt->inSubset = 1;
10699 xmlParseDocTypeDecl(ctxt);
10700 if (RAW == '[') {
10701 ctxt->instate = XML_PARSER_DTD;
10702 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010703 if (ctxt->instate == XML_PARSER_EOF)
10704 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010705 }
10706
10707 /*
10708 * Create and update the external subset.
10709 */
10710 ctxt->inSubset = 2;
10711 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10712 (!ctxt->disableSAX))
10713 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10714 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010715 if (ctxt->instate == XML_PARSER_EOF)
10716 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010717 ctxt->inSubset = 0;
10718
Daniel Veillardac4118d2008-01-11 05:27:32 +000010719 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010720
10721 ctxt->instate = XML_PARSER_PROLOG;
10722 xmlParseMisc(ctxt);
10723 }
10724
10725 /*
10726 * Time to start parsing the tree itself
10727 */
10728 GROW;
10729 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010730 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10731 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010732 } else {
10733 ctxt->instate = XML_PARSER_CONTENT;
10734 xmlParseElement(ctxt);
10735 ctxt->instate = XML_PARSER_EPILOG;
10736
10737
10738 /*
10739 * The Misc part at the end
10740 */
10741 xmlParseMisc(ctxt);
10742
Daniel Veillard561b7f82002-03-20 21:55:57 +000010743 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010744 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010745 }
10746 ctxt->instate = XML_PARSER_EOF;
10747 }
10748
10749 /*
10750 * SAX: end of the document processing.
10751 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010752 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010753 ctxt->sax->endDocument(ctxt->userData);
10754
Daniel Veillard5997aca2002-03-18 18:36:20 +000010755 /*
10756 * Remove locally kept entity definitions if the tree was not built
10757 */
10758 if ((ctxt->myDoc != NULL) &&
10759 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10760 xmlFreeDoc(ctxt->myDoc);
10761 ctxt->myDoc = NULL;
10762 }
10763
Daniel Veillardae0765b2008-07-31 19:54:59 +000010764 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10765 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10766 if (ctxt->valid)
10767 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10768 if (ctxt->nsWellFormed)
10769 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10770 if (ctxt->options & XML_PARSE_OLD10)
10771 ctxt->myDoc->properties |= XML_DOC_OLD10;
10772 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010773 if (! ctxt->wellFormed) {
10774 ctxt->valid = 0;
10775 return(-1);
10776 }
Owen Taylor3473f882001-02-23 17:55:21 +000010777 return(0);
10778}
10779
10780/**
10781 * xmlParseExtParsedEnt:
10782 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010783 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010784 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010785 * An external general parsed entity is well-formed if it matches the
10786 * production labeled extParsedEnt.
10787 *
10788 * [78] extParsedEnt ::= TextDecl? content
10789 *
10790 * Returns 0, -1 in case of error. the parser context is augmented
10791 * as a result of the parsing.
10792 */
10793
10794int
10795xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10796 xmlChar start[4];
10797 xmlCharEncoding enc;
10798
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010799 if ((ctxt == NULL) || (ctxt->input == NULL))
10800 return(-1);
10801
Owen Taylor3473f882001-02-23 17:55:21 +000010802 xmlDefaultSAXHandlerInit();
10803
Daniel Veillard309f81d2003-09-23 09:02:53 +000010804 xmlDetectSAX2(ctxt);
10805
Owen Taylor3473f882001-02-23 17:55:21 +000010806 GROW;
10807
10808 /*
10809 * SAX: beginning of the document processing.
10810 */
10811 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10812 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10813
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010814 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010815 * Get the 4 first bytes and decode the charset
10816 * if enc != XML_CHAR_ENCODING_NONE
10817 * plug some encoding conversion routines.
10818 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010819 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10820 start[0] = RAW;
10821 start[1] = NXT(1);
10822 start[2] = NXT(2);
10823 start[3] = NXT(3);
10824 enc = xmlDetectCharEncoding(start, 4);
10825 if (enc != XML_CHAR_ENCODING_NONE) {
10826 xmlSwitchEncoding(ctxt, enc);
10827 }
Owen Taylor3473f882001-02-23 17:55:21 +000010828 }
10829
10830
10831 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010832 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010833 }
10834
10835 /*
10836 * Check for the XMLDecl in the Prolog.
10837 */
10838 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010839 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010840
10841 /*
10842 * Note that we will switch encoding on the fly.
10843 */
10844 xmlParseXMLDecl(ctxt);
10845 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10846 /*
10847 * The XML REC instructs us to stop parsing right here
10848 */
10849 return(-1);
10850 }
10851 SKIP_BLANKS;
10852 } else {
10853 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10854 }
10855 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10856 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010857 if (ctxt->instate == XML_PARSER_EOF)
10858 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010859
10860 /*
10861 * Doing validity checking on chunk doesn't make sense
10862 */
10863 ctxt->instate = XML_PARSER_CONTENT;
10864 ctxt->validate = 0;
10865 ctxt->loadsubset = 0;
10866 ctxt->depth = 0;
10867
10868 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010869 if (ctxt->instate == XML_PARSER_EOF)
10870 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010871
Owen Taylor3473f882001-02-23 17:55:21 +000010872 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010873 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010874 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010875 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010876 }
10877
10878 /*
10879 * SAX: end of the document processing.
10880 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010881 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010882 ctxt->sax->endDocument(ctxt->userData);
10883
10884 if (! ctxt->wellFormed) return(-1);
10885 return(0);
10886}
10887
Daniel Veillard73b013f2003-09-30 12:36:01 +000010888#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010889/************************************************************************
10890 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010891 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010892 * *
10893 ************************************************************************/
10894
10895/**
10896 * xmlParseLookupSequence:
10897 * @ctxt: an XML parser context
10898 * @first: the first char to lookup
10899 * @next: the next char to lookup or zero
10900 * @third: the next char to lookup or zero
10901 *
10902 * Try to find if a sequence (first, next, third) or just (first next) or
10903 * (first) is available in the input stream.
10904 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10905 * to avoid rescanning sequences of bytes, it DOES change the state of the
10906 * parser, do not use liberally.
10907 *
10908 * Returns the index to the current parsing point if the full sequence
10909 * is available, -1 otherwise.
10910 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010911static int
Owen Taylor3473f882001-02-23 17:55:21 +000010912xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10913 xmlChar next, xmlChar third) {
10914 int base, len;
10915 xmlParserInputPtr in;
10916 const xmlChar *buf;
10917
10918 in = ctxt->input;
10919 if (in == NULL) return(-1);
10920 base = in->cur - in->base;
10921 if (base < 0) return(-1);
10922 if (ctxt->checkIndex > base)
10923 base = ctxt->checkIndex;
10924 if (in->buf == NULL) {
10925 buf = in->base;
10926 len = in->length;
10927 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010928 buf = xmlBufContent(in->buf->buffer);
10929 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010930 }
10931 /* take into account the sequence length */
10932 if (third) len -= 2;
10933 else if (next) len --;
10934 for (;base < len;base++) {
10935 if (buf[base] == first) {
10936 if (third != 0) {
10937 if ((buf[base + 1] != next) ||
10938 (buf[base + 2] != third)) continue;
10939 } else if (next != 0) {
10940 if (buf[base + 1] != next) continue;
10941 }
10942 ctxt->checkIndex = 0;
10943#ifdef DEBUG_PUSH
10944 if (next == 0)
10945 xmlGenericError(xmlGenericErrorContext,
10946 "PP: lookup '%c' found at %d\n",
10947 first, base);
10948 else if (third == 0)
10949 xmlGenericError(xmlGenericErrorContext,
10950 "PP: lookup '%c%c' found at %d\n",
10951 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010952 else
Owen Taylor3473f882001-02-23 17:55:21 +000010953 xmlGenericError(xmlGenericErrorContext,
10954 "PP: lookup '%c%c%c' found at %d\n",
10955 first, next, third, base);
10956#endif
10957 return(base - (in->cur - in->base));
10958 }
10959 }
10960 ctxt->checkIndex = base;
10961#ifdef DEBUG_PUSH
10962 if (next == 0)
10963 xmlGenericError(xmlGenericErrorContext,
10964 "PP: lookup '%c' failed\n", first);
10965 else if (third == 0)
10966 xmlGenericError(xmlGenericErrorContext,
10967 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010968 else
Owen Taylor3473f882001-02-23 17:55:21 +000010969 xmlGenericError(xmlGenericErrorContext,
10970 "PP: lookup '%c%c%c' failed\n", first, next, third);
10971#endif
10972 return(-1);
10973}
10974
10975/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010976 * xmlParseGetLasts:
10977 * @ctxt: an XML parser context
10978 * @lastlt: pointer to store the last '<' from the input
10979 * @lastgt: pointer to store the last '>' from the input
10980 *
10981 * Lookup the last < and > in the current chunk
10982 */
10983static void
10984xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10985 const xmlChar **lastgt) {
10986 const xmlChar *tmp;
10987
10988 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10989 xmlGenericError(xmlGenericErrorContext,
10990 "Internal error: xmlParseGetLasts\n");
10991 return;
10992 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010993 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010994 tmp = ctxt->input->end;
10995 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010996 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010997 if (tmp < ctxt->input->base) {
10998 *lastlt = NULL;
10999 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011000 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011001 *lastlt = tmp;
11002 tmp++;
11003 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11004 if (*tmp == '\'') {
11005 tmp++;
11006 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11007 if (tmp < ctxt->input->end) tmp++;
11008 } else if (*tmp == '"') {
11009 tmp++;
11010 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11011 if (tmp < ctxt->input->end) tmp++;
11012 } else
11013 tmp++;
11014 }
11015 if (tmp < ctxt->input->end)
11016 *lastgt = tmp;
11017 else {
11018 tmp = *lastlt;
11019 tmp--;
11020 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11021 if (tmp >= ctxt->input->base)
11022 *lastgt = tmp;
11023 else
11024 *lastgt = NULL;
11025 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011026 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011027 } else {
11028 *lastlt = NULL;
11029 *lastgt = NULL;
11030 }
11031}
11032/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011033 * xmlCheckCdataPush:
11034 * @cur: pointer to the bock of characters
11035 * @len: length of the block in bytes
11036 *
11037 * Check that the block of characters is okay as SCdata content [20]
11038 *
11039 * Returns the number of bytes to pass if okay, a negative index where an
11040 * UTF-8 error occured otherwise
11041 */
11042static int
11043xmlCheckCdataPush(const xmlChar *utf, int len) {
11044 int ix;
11045 unsigned char c;
11046 int codepoint;
11047
11048 if ((utf == NULL) || (len <= 0))
11049 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011050
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011051 for (ix = 0; ix < len;) { /* string is 0-terminated */
11052 c = utf[ix];
11053 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11054 if (c >= 0x20)
11055 ix++;
11056 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11057 ix++;
11058 else
11059 return(-ix);
11060 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11061 if (ix + 2 > len) return(ix);
11062 if ((utf[ix+1] & 0xc0 ) != 0x80)
11063 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011064 codepoint = (utf[ix] & 0x1f) << 6;
11065 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011066 if (!xmlIsCharQ(codepoint))
11067 return(-ix);
11068 ix += 2;
11069 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11070 if (ix + 3 > len) return(ix);
11071 if (((utf[ix+1] & 0xc0) != 0x80) ||
11072 ((utf[ix+2] & 0xc0) != 0x80))
11073 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011074 codepoint = (utf[ix] & 0xf) << 12;
11075 codepoint |= (utf[ix+1] & 0x3f) << 6;
11076 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011077 if (!xmlIsCharQ(codepoint))
11078 return(-ix);
11079 ix += 3;
11080 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11081 if (ix + 4 > len) return(ix);
11082 if (((utf[ix+1] & 0xc0) != 0x80) ||
11083 ((utf[ix+2] & 0xc0) != 0x80) ||
11084 ((utf[ix+3] & 0xc0) != 0x80))
11085 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011086 codepoint = (utf[ix] & 0x7) << 18;
11087 codepoint |= (utf[ix+1] & 0x3f) << 12;
11088 codepoint |= (utf[ix+2] & 0x3f) << 6;
11089 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011090 if (!xmlIsCharQ(codepoint))
11091 return(-ix);
11092 ix += 4;
11093 } else /* unknown encoding */
11094 return(-ix);
11095 }
11096 return(ix);
11097}
11098
11099/**
Owen Taylor3473f882001-02-23 17:55:21 +000011100 * xmlParseTryOrFinish:
11101 * @ctxt: an XML parser context
11102 * @terminate: last chunk indicator
11103 *
11104 * Try to progress on parsing
11105 *
11106 * Returns zero if no parsing was possible
11107 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011108static int
Owen Taylor3473f882001-02-23 17:55:21 +000011109xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11110 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011111 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011112 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011113 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011114
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011115 if (ctxt->input == NULL)
11116 return(0);
11117
Owen Taylor3473f882001-02-23 17:55:21 +000011118#ifdef DEBUG_PUSH
11119 switch (ctxt->instate) {
11120 case XML_PARSER_EOF:
11121 xmlGenericError(xmlGenericErrorContext,
11122 "PP: try EOF\n"); break;
11123 case XML_PARSER_START:
11124 xmlGenericError(xmlGenericErrorContext,
11125 "PP: try START\n"); break;
11126 case XML_PARSER_MISC:
11127 xmlGenericError(xmlGenericErrorContext,
11128 "PP: try MISC\n");break;
11129 case XML_PARSER_COMMENT:
11130 xmlGenericError(xmlGenericErrorContext,
11131 "PP: try COMMENT\n");break;
11132 case XML_PARSER_PROLOG:
11133 xmlGenericError(xmlGenericErrorContext,
11134 "PP: try PROLOG\n");break;
11135 case XML_PARSER_START_TAG:
11136 xmlGenericError(xmlGenericErrorContext,
11137 "PP: try START_TAG\n");break;
11138 case XML_PARSER_CONTENT:
11139 xmlGenericError(xmlGenericErrorContext,
11140 "PP: try CONTENT\n");break;
11141 case XML_PARSER_CDATA_SECTION:
11142 xmlGenericError(xmlGenericErrorContext,
11143 "PP: try CDATA_SECTION\n");break;
11144 case XML_PARSER_END_TAG:
11145 xmlGenericError(xmlGenericErrorContext,
11146 "PP: try END_TAG\n");break;
11147 case XML_PARSER_ENTITY_DECL:
11148 xmlGenericError(xmlGenericErrorContext,
11149 "PP: try ENTITY_DECL\n");break;
11150 case XML_PARSER_ENTITY_VALUE:
11151 xmlGenericError(xmlGenericErrorContext,
11152 "PP: try ENTITY_VALUE\n");break;
11153 case XML_PARSER_ATTRIBUTE_VALUE:
11154 xmlGenericError(xmlGenericErrorContext,
11155 "PP: try ATTRIBUTE_VALUE\n");break;
11156 case XML_PARSER_DTD:
11157 xmlGenericError(xmlGenericErrorContext,
11158 "PP: try DTD\n");break;
11159 case XML_PARSER_EPILOG:
11160 xmlGenericError(xmlGenericErrorContext,
11161 "PP: try EPILOG\n");break;
11162 case XML_PARSER_PI:
11163 xmlGenericError(xmlGenericErrorContext,
11164 "PP: try PI\n");break;
11165 case XML_PARSER_IGNORE:
11166 xmlGenericError(xmlGenericErrorContext,
11167 "PP: try IGNORE\n");break;
11168 }
11169#endif
11170
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011171 if ((ctxt->input != NULL) &&
11172 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011173 xmlSHRINK(ctxt);
11174 ctxt->checkIndex = 0;
11175 }
11176 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011177
Daniel Veillarde50ba812013-04-11 15:54:51 +080011178 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011179 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011180 return(0);
11181
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011182
Owen Taylor3473f882001-02-23 17:55:21 +000011183 /*
11184 * Pop-up of finished entities.
11185 */
11186 while ((RAW == 0) && (ctxt->inputNr > 1))
11187 xmlPopInput(ctxt);
11188
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011189 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011190 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011191 avail = ctxt->input->length -
11192 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011193 else {
11194 /*
11195 * If we are operating on converted input, try to flush
11196 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011197 * buffer. But do not do this in document start where
11198 * encoding="..." may not have been read and we work on a
11199 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011200 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011201 if ((ctxt->instate != XML_PARSER_START) &&
11202 (ctxt->input->buf->raw != NULL) &&
11203 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011204 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11205 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011206 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011207
11208 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011209 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11210 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011211 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011212 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011213 (ctxt->input->cur - ctxt->input->base);
11214 }
Owen Taylor3473f882001-02-23 17:55:21 +000011215 if (avail < 1)
11216 goto done;
11217 switch (ctxt->instate) {
11218 case XML_PARSER_EOF:
11219 /*
11220 * Document parsing is done !
11221 */
11222 goto done;
11223 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011224 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11225 xmlChar start[4];
11226 xmlCharEncoding enc;
11227
11228 /*
11229 * Very first chars read from the document flow.
11230 */
11231 if (avail < 4)
11232 goto done;
11233
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011234 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011235 * Get the 4 first bytes and decode the charset
11236 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011237 * plug some encoding conversion routines,
11238 * else xmlSwitchEncoding will set to (default)
11239 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011240 */
11241 start[0] = RAW;
11242 start[1] = NXT(1);
11243 start[2] = NXT(2);
11244 start[3] = NXT(3);
11245 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011246 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011247 break;
11248 }
Owen Taylor3473f882001-02-23 17:55:21 +000011249
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011250 if (avail < 2)
11251 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011252 cur = ctxt->input->cur[0];
11253 next = ctxt->input->cur[1];
11254 if (cur == 0) {
11255 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11256 ctxt->sax->setDocumentLocator(ctxt->userData,
11257 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011258 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011259 ctxt->instate = XML_PARSER_EOF;
11260#ifdef DEBUG_PUSH
11261 xmlGenericError(xmlGenericErrorContext,
11262 "PP: entering EOF\n");
11263#endif
11264 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11265 ctxt->sax->endDocument(ctxt->userData);
11266 goto done;
11267 }
11268 if ((cur == '<') && (next == '?')) {
11269 /* PI or XML decl */
11270 if (avail < 5) return(ret);
11271 if ((!terminate) &&
11272 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11273 return(ret);
11274 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11275 ctxt->sax->setDocumentLocator(ctxt->userData,
11276 &xmlDefaultSAXLocator);
11277 if ((ctxt->input->cur[2] == 'x') &&
11278 (ctxt->input->cur[3] == 'm') &&
11279 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011280 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011281 ret += 5;
11282#ifdef DEBUG_PUSH
11283 xmlGenericError(xmlGenericErrorContext,
11284 "PP: Parsing XML Decl\n");
11285#endif
11286 xmlParseXMLDecl(ctxt);
11287 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11288 /*
11289 * The XML REC instructs us to stop parsing right
11290 * here
11291 */
11292 ctxt->instate = XML_PARSER_EOF;
11293 return(0);
11294 }
11295 ctxt->standalone = ctxt->input->standalone;
11296 if ((ctxt->encoding == NULL) &&
11297 (ctxt->input->encoding != NULL))
11298 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11299 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11300 (!ctxt->disableSAX))
11301 ctxt->sax->startDocument(ctxt->userData);
11302 ctxt->instate = XML_PARSER_MISC;
11303#ifdef DEBUG_PUSH
11304 xmlGenericError(xmlGenericErrorContext,
11305 "PP: entering MISC\n");
11306#endif
11307 } else {
11308 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11309 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11310 (!ctxt->disableSAX))
11311 ctxt->sax->startDocument(ctxt->userData);
11312 ctxt->instate = XML_PARSER_MISC;
11313#ifdef DEBUG_PUSH
11314 xmlGenericError(xmlGenericErrorContext,
11315 "PP: entering MISC\n");
11316#endif
11317 }
11318 } else {
11319 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11320 ctxt->sax->setDocumentLocator(ctxt->userData,
11321 &xmlDefaultSAXLocator);
11322 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011323 if (ctxt->version == NULL) {
11324 xmlErrMemory(ctxt, NULL);
11325 break;
11326 }
Owen Taylor3473f882001-02-23 17:55:21 +000011327 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11328 (!ctxt->disableSAX))
11329 ctxt->sax->startDocument(ctxt->userData);
11330 ctxt->instate = XML_PARSER_MISC;
11331#ifdef DEBUG_PUSH
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: entering MISC\n");
11334#endif
11335 }
11336 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011337 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011338 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011339 const xmlChar *prefix = NULL;
11340 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011341 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011342
11343 if ((avail < 2) && (ctxt->inputNr == 1))
11344 goto done;
11345 cur = ctxt->input->cur[0];
11346 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011347 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011348 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011349 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11350 ctxt->sax->endDocument(ctxt->userData);
11351 goto done;
11352 }
11353 if (!terminate) {
11354 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011355 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011356 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011357 goto done;
11358 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11359 goto done;
11360 }
11361 }
11362 if (ctxt->spaceNr == 0)
11363 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011364 else if (*ctxt->space == -2)
11365 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011366 else
11367 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011368#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011369 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011370#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011371 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011372#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011373 else
11374 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011375#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011376 if (ctxt->instate == XML_PARSER_EOF)
11377 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011378 if (name == NULL) {
11379 spacePop(ctxt);
11380 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011381 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11382 ctxt->sax->endDocument(ctxt->userData);
11383 goto done;
11384 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011385#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011386 /*
11387 * [ VC: Root Element Type ]
11388 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011389 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011390 */
11391 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11392 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11393 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011394#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011395
11396 /*
11397 * Check for an Empty Element.
11398 */
11399 if ((RAW == '/') && (NXT(1) == '>')) {
11400 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011401
11402 if (ctxt->sax2) {
11403 if ((ctxt->sax != NULL) &&
11404 (ctxt->sax->endElementNs != NULL) &&
11405 (!ctxt->disableSAX))
11406 ctxt->sax->endElementNs(ctxt->userData, name,
11407 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011408 if (ctxt->nsNr - nsNr > 0)
11409 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011410#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011411 } else {
11412 if ((ctxt->sax != NULL) &&
11413 (ctxt->sax->endElement != NULL) &&
11414 (!ctxt->disableSAX))
11415 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011416#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011417 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011418 if (ctxt->instate == XML_PARSER_EOF)
11419 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011420 spacePop(ctxt);
11421 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011422 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011423 } else {
11424 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011425 }
Daniel Veillard65686452012-07-19 18:25:01 +080011426 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011427 break;
11428 }
11429 if (RAW == '>') {
11430 NEXT;
11431 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011432 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011433 "Couldn't find end of Start Tag %s\n",
11434 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011435 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011436 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011437 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011438 if (ctxt->sax2)
11439 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011440#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011441 else
11442 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011443#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011444
Daniel Veillarda880b122003-04-21 21:36:41 +000011445 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011446 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011447 break;
11448 }
11449 case XML_PARSER_CONTENT: {
11450 const xmlChar *test;
11451 unsigned int cons;
11452 if ((avail < 2) && (ctxt->inputNr == 1))
11453 goto done;
11454 cur = ctxt->input->cur[0];
11455 next = ctxt->input->cur[1];
11456
11457 test = CUR_PTR;
11458 cons = ctxt->input->consumed;
11459 if ((cur == '<') && (next == '/')) {
11460 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011461 break;
11462 } else if ((cur == '<') && (next == '?')) {
11463 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011464 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11465 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011466 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011467 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011468 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011469 ctxt->instate = XML_PARSER_CONTENT;
11470 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011471 } else if ((cur == '<') && (next != '!')) {
11472 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011473 break;
11474 } else if ((cur == '<') && (next == '!') &&
11475 (ctxt->input->cur[2] == '-') &&
11476 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011477 int term;
11478
11479 if (avail < 4)
11480 goto done;
11481 ctxt->input->cur += 4;
11482 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11483 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011484 if ((!terminate) && (term < 0)) {
11485 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011486 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011487 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011488 xmlParseComment(ctxt);
11489 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011490 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011491 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11492 (ctxt->input->cur[2] == '[') &&
11493 (ctxt->input->cur[3] == 'C') &&
11494 (ctxt->input->cur[4] == 'D') &&
11495 (ctxt->input->cur[5] == 'A') &&
11496 (ctxt->input->cur[6] == 'T') &&
11497 (ctxt->input->cur[7] == 'A') &&
11498 (ctxt->input->cur[8] == '[')) {
11499 SKIP(9);
11500 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011501 break;
11502 } else if ((cur == '<') && (next == '!') &&
11503 (avail < 9)) {
11504 goto done;
11505 } else if (cur == '&') {
11506 if ((!terminate) &&
11507 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11508 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011509 xmlParseReference(ctxt);
11510 } else {
11511 /* TODO Avoid the extra copy, handle directly !!! */
11512 /*
11513 * Goal of the following test is:
11514 * - minimize calls to the SAX 'character' callback
11515 * when they are mergeable
11516 * - handle an problem for isBlank when we only parse
11517 * a sequence of blank chars and the next one is
11518 * not available to check against '<' presence.
11519 * - tries to homogenize the differences in SAX
11520 * callbacks between the push and pull versions
11521 * of the parser.
11522 */
11523 if ((ctxt->inputNr == 1) &&
11524 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11525 if (!terminate) {
11526 if (ctxt->progressive) {
11527 if ((lastlt == NULL) ||
11528 (ctxt->input->cur > lastlt))
11529 goto done;
11530 } else if (xmlParseLookupSequence(ctxt,
11531 '<', 0, 0) < 0) {
11532 goto done;
11533 }
11534 }
11535 }
11536 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011537 xmlParseCharData(ctxt, 0);
11538 }
11539 /*
11540 * Pop-up of finished entities.
11541 */
11542 while ((RAW == 0) && (ctxt->inputNr > 1))
11543 xmlPopInput(ctxt);
11544 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011545 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11546 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011547 ctxt->instate = XML_PARSER_EOF;
11548 break;
11549 }
11550 break;
11551 }
11552 case XML_PARSER_END_TAG:
11553 if (avail < 2)
11554 goto done;
11555 if (!terminate) {
11556 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011557 /* > can be found unescaped in attribute values */
11558 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011559 goto done;
11560 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11561 goto done;
11562 }
11563 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011564 if (ctxt->sax2) {
11565 xmlParseEndTag2(ctxt,
11566 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11567 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011568 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011569 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011570 }
11571#ifdef LIBXML_SAX1_ENABLED
11572 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011573 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011574#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011575 if (ctxt->instate == XML_PARSER_EOF) {
11576 /* Nothing */
11577 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011578 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011579 } else {
11580 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011581 }
11582 break;
11583 case XML_PARSER_CDATA_SECTION: {
11584 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011585 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011586 * cdataBlock merge back contiguous callbacks.
11587 */
11588 int base;
11589
11590 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11591 if (base < 0) {
11592 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011593 int tmp;
11594
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011595 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011596 XML_PARSER_BIG_BUFFER_SIZE);
11597 if (tmp < 0) {
11598 tmp = -tmp;
11599 ctxt->input->cur += tmp;
11600 goto encoding_error;
11601 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011602 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11603 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011604 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011605 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011606 else if (ctxt->sax->characters != NULL)
11607 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011608 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011609 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011610 if (ctxt->instate == XML_PARSER_EOF)
11611 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011612 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011613 ctxt->checkIndex = 0;
11614 }
11615 goto done;
11616 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011617 int tmp;
11618
11619 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11620 if ((tmp < 0) || (tmp != base)) {
11621 tmp = -tmp;
11622 ctxt->input->cur += tmp;
11623 goto encoding_error;
11624 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011625 if ((ctxt->sax != NULL) && (base == 0) &&
11626 (ctxt->sax->cdataBlock != NULL) &&
11627 (!ctxt->disableSAX)) {
11628 /*
11629 * Special case to provide identical behaviour
11630 * between pull and push parsers on enpty CDATA
11631 * sections
11632 */
11633 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11634 (!strncmp((const char *)&ctxt->input->cur[-9],
11635 "<![CDATA[", 9)))
11636 ctxt->sax->cdataBlock(ctxt->userData,
11637 BAD_CAST "", 0);
11638 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011639 (!ctxt->disableSAX)) {
11640 if (ctxt->sax->cdataBlock != NULL)
11641 ctxt->sax->cdataBlock(ctxt->userData,
11642 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011643 else if (ctxt->sax->characters != NULL)
11644 ctxt->sax->characters(ctxt->userData,
11645 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011646 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011647 if (ctxt->instate == XML_PARSER_EOF)
11648 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011649 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011650 ctxt->checkIndex = 0;
11651 ctxt->instate = XML_PARSER_CONTENT;
11652#ifdef DEBUG_PUSH
11653 xmlGenericError(xmlGenericErrorContext,
11654 "PP: entering CONTENT\n");
11655#endif
11656 }
11657 break;
11658 }
Owen Taylor3473f882001-02-23 17:55:21 +000011659 case XML_PARSER_MISC:
11660 SKIP_BLANKS;
11661 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011662 avail = ctxt->input->length -
11663 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011664 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011665 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011666 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011667 if (avail < 2)
11668 goto done;
11669 cur = ctxt->input->cur[0];
11670 next = ctxt->input->cur[1];
11671 if ((cur == '<') && (next == '?')) {
11672 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011673 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11674 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011675 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011676 }
Owen Taylor3473f882001-02-23 17:55:21 +000011677#ifdef DEBUG_PUSH
11678 xmlGenericError(xmlGenericErrorContext,
11679 "PP: Parsing PI\n");
11680#endif
11681 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011682 if (ctxt->instate == XML_PARSER_EOF)
11683 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011684 ctxt->instate = XML_PARSER_MISC;
11685 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011686 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011687 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011688 (ctxt->input->cur[2] == '-') &&
11689 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011690 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011691 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11692 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011693 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011694 }
Owen Taylor3473f882001-02-23 17:55:21 +000011695#ifdef DEBUG_PUSH
11696 xmlGenericError(xmlGenericErrorContext,
11697 "PP: Parsing Comment\n");
11698#endif
11699 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011700 if (ctxt->instate == XML_PARSER_EOF)
11701 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011702 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011703 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011704 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011705 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011706 (ctxt->input->cur[2] == 'D') &&
11707 (ctxt->input->cur[3] == 'O') &&
11708 (ctxt->input->cur[4] == 'C') &&
11709 (ctxt->input->cur[5] == 'T') &&
11710 (ctxt->input->cur[6] == 'Y') &&
11711 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011712 (ctxt->input->cur[8] == 'E')) {
11713 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011714 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11715 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011716 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011717 }
Owen Taylor3473f882001-02-23 17:55:21 +000011718#ifdef DEBUG_PUSH
11719 xmlGenericError(xmlGenericErrorContext,
11720 "PP: Parsing internal subset\n");
11721#endif
11722 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011723 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011724 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011725 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011726 if (ctxt->instate == XML_PARSER_EOF)
11727 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011728 if (RAW == '[') {
11729 ctxt->instate = XML_PARSER_DTD;
11730#ifdef DEBUG_PUSH
11731 xmlGenericError(xmlGenericErrorContext,
11732 "PP: entering DTD\n");
11733#endif
11734 } else {
11735 /*
11736 * Create and update the external subset.
11737 */
11738 ctxt->inSubset = 2;
11739 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11740 (ctxt->sax->externalSubset != NULL))
11741 ctxt->sax->externalSubset(ctxt->userData,
11742 ctxt->intSubName, ctxt->extSubSystem,
11743 ctxt->extSubURI);
11744 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011745 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011746 ctxt->instate = XML_PARSER_PROLOG;
11747#ifdef DEBUG_PUSH
11748 xmlGenericError(xmlGenericErrorContext,
11749 "PP: entering PROLOG\n");
11750#endif
11751 }
11752 } else if ((cur == '<') && (next == '!') &&
11753 (avail < 9)) {
11754 goto done;
11755 } else {
11756 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011757 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011758 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011759#ifdef DEBUG_PUSH
11760 xmlGenericError(xmlGenericErrorContext,
11761 "PP: entering START_TAG\n");
11762#endif
11763 }
11764 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011765 case XML_PARSER_PROLOG:
11766 SKIP_BLANKS;
11767 if (ctxt->input->buf == NULL)
11768 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11769 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011770 avail = xmlBufUse(ctxt->input->buf->buffer) -
11771 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011772 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011773 goto done;
11774 cur = ctxt->input->cur[0];
11775 next = ctxt->input->cur[1];
11776 if ((cur == '<') && (next == '?')) {
11777 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011778 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11779 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011780 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011781 }
Owen Taylor3473f882001-02-23 17:55:21 +000011782#ifdef DEBUG_PUSH
11783 xmlGenericError(xmlGenericErrorContext,
11784 "PP: Parsing PI\n");
11785#endif
11786 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011787 if (ctxt->instate == XML_PARSER_EOF)
11788 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011789 ctxt->instate = XML_PARSER_PROLOG;
11790 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011791 } else if ((cur == '<') && (next == '!') &&
11792 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11793 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011794 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11795 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011796 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011797 }
Owen Taylor3473f882001-02-23 17:55:21 +000011798#ifdef DEBUG_PUSH
11799 xmlGenericError(xmlGenericErrorContext,
11800 "PP: Parsing Comment\n");
11801#endif
11802 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011803 if (ctxt->instate == XML_PARSER_EOF)
11804 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011805 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011806 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011807 } else if ((cur == '<') && (next == '!') &&
11808 (avail < 4)) {
11809 goto done;
11810 } else {
11811 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011812 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011813 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011814 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011815#ifdef DEBUG_PUSH
11816 xmlGenericError(xmlGenericErrorContext,
11817 "PP: entering START_TAG\n");
11818#endif
11819 }
11820 break;
11821 case XML_PARSER_EPILOG:
11822 SKIP_BLANKS;
11823 if (ctxt->input->buf == NULL)
11824 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11825 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011826 avail = xmlBufUse(ctxt->input->buf->buffer) -
11827 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011828 if (avail < 2)
11829 goto done;
11830 cur = ctxt->input->cur[0];
11831 next = ctxt->input->cur[1];
11832 if ((cur == '<') && (next == '?')) {
11833 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011834 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11835 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011836 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011837 }
Owen Taylor3473f882001-02-23 17:55:21 +000011838#ifdef DEBUG_PUSH
11839 xmlGenericError(xmlGenericErrorContext,
11840 "PP: Parsing PI\n");
11841#endif
11842 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011843 if (ctxt->instate == XML_PARSER_EOF)
11844 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011845 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011846 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011847 } else if ((cur == '<') && (next == '!') &&
11848 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11849 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011850 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11851 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011852 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011853 }
Owen Taylor3473f882001-02-23 17:55:21 +000011854#ifdef DEBUG_PUSH
11855 xmlGenericError(xmlGenericErrorContext,
11856 "PP: Parsing Comment\n");
11857#endif
11858 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011859 if (ctxt->instate == XML_PARSER_EOF)
11860 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011861 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011862 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011863 } else if ((cur == '<') && (next == '!') &&
11864 (avail < 4)) {
11865 goto done;
11866 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011867 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011868 ctxt->instate = XML_PARSER_EOF;
11869#ifdef DEBUG_PUSH
11870 xmlGenericError(xmlGenericErrorContext,
11871 "PP: entering EOF\n");
11872#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011873 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011874 ctxt->sax->endDocument(ctxt->userData);
11875 goto done;
11876 }
11877 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011878 case XML_PARSER_DTD: {
11879 /*
11880 * Sorry but progressive parsing of the internal subset
11881 * is not expected to be supported. We first check that
11882 * the full content of the internal subset is available and
11883 * the parsing is launched only at that point.
11884 * Internal subset ends up with "']' S? '>'" in an unescaped
11885 * section and not in a ']]>' sequence which are conditional
11886 * sections (whoever argued to keep that crap in XML deserve
11887 * a place in hell !).
11888 */
11889 int base, i;
11890 xmlChar *buf;
11891 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011892 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011893
11894 base = ctxt->input->cur - ctxt->input->base;
11895 if (base < 0) return(0);
11896 if (ctxt->checkIndex > base)
11897 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011898 buf = xmlBufContent(ctxt->input->buf->buffer);
11899 use = xmlBufUse(ctxt->input->buf->buffer);
11900 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011901 if (quote != 0) {
11902 if (buf[base] == quote)
11903 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011904 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011905 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011906 if ((quote == 0) && (buf[base] == '<')) {
11907 int found = 0;
11908 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011909 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011910 (buf[base + 1] == '!') &&
11911 (buf[base + 2] == '-') &&
11912 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011913 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011914 if ((buf[base] == '-') &&
11915 (buf[base + 1] == '-') &&
11916 (buf[base + 2] == '>')) {
11917 found = 1;
11918 base += 2;
11919 break;
11920 }
11921 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011922 if (!found) {
11923#if 0
11924 fprintf(stderr, "unfinished comment\n");
11925#endif
11926 break; /* for */
11927 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011928 continue;
11929 }
11930 }
Owen Taylor3473f882001-02-23 17:55:21 +000011931 if (buf[base] == '"') {
11932 quote = '"';
11933 continue;
11934 }
11935 if (buf[base] == '\'') {
11936 quote = '\'';
11937 continue;
11938 }
11939 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011940#if 0
11941 fprintf(stderr, "%c%c%c%c: ", buf[base],
11942 buf[base + 1], buf[base + 2], buf[base + 3]);
11943#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011944 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011945 break;
11946 if (buf[base + 1] == ']') {
11947 /* conditional crap, skip both ']' ! */
11948 base++;
11949 continue;
11950 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011951 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011952 if (buf[base + i] == '>') {
11953#if 0
11954 fprintf(stderr, "found\n");
11955#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011956 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011957 }
11958 if (!IS_BLANK_CH(buf[base + i])) {
11959#if 0
11960 fprintf(stderr, "not found\n");
11961#endif
11962 goto not_end_of_int_subset;
11963 }
Owen Taylor3473f882001-02-23 17:55:21 +000011964 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011965#if 0
11966 fprintf(stderr, "end of stream\n");
11967#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011968 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011969
Owen Taylor3473f882001-02-23 17:55:21 +000011970 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011971not_end_of_int_subset:
11972 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011973 }
11974 /*
11975 * We didn't found the end of the Internal subset
11976 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011977 if (quote == 0)
11978 ctxt->checkIndex = base;
11979 else
11980 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011981#ifdef DEBUG_PUSH
11982 if (next == 0)
11983 xmlGenericError(xmlGenericErrorContext,
11984 "PP: lookup of int subset end filed\n");
11985#endif
11986 goto done;
11987
11988found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011989 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011990 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011991 if (ctxt->instate == XML_PARSER_EOF)
11992 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011993 ctxt->inSubset = 2;
11994 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11995 (ctxt->sax->externalSubset != NULL))
11996 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11997 ctxt->extSubSystem, ctxt->extSubURI);
11998 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011999 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012000 if (ctxt->instate == XML_PARSER_EOF)
12001 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012002 ctxt->instate = XML_PARSER_PROLOG;
12003 ctxt->checkIndex = 0;
12004#ifdef DEBUG_PUSH
12005 xmlGenericError(xmlGenericErrorContext,
12006 "PP: entering PROLOG\n");
12007#endif
12008 break;
12009 }
12010 case XML_PARSER_COMMENT:
12011 xmlGenericError(xmlGenericErrorContext,
12012 "PP: internal error, state == COMMENT\n");
12013 ctxt->instate = XML_PARSER_CONTENT;
12014#ifdef DEBUG_PUSH
12015 xmlGenericError(xmlGenericErrorContext,
12016 "PP: entering CONTENT\n");
12017#endif
12018 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012019 case XML_PARSER_IGNORE:
12020 xmlGenericError(xmlGenericErrorContext,
12021 "PP: internal error, state == IGNORE");
12022 ctxt->instate = XML_PARSER_DTD;
12023#ifdef DEBUG_PUSH
12024 xmlGenericError(xmlGenericErrorContext,
12025 "PP: entering DTD\n");
12026#endif
12027 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012028 case XML_PARSER_PI:
12029 xmlGenericError(xmlGenericErrorContext,
12030 "PP: internal error, state == PI\n");
12031 ctxt->instate = XML_PARSER_CONTENT;
12032#ifdef DEBUG_PUSH
12033 xmlGenericError(xmlGenericErrorContext,
12034 "PP: entering CONTENT\n");
12035#endif
12036 break;
12037 case XML_PARSER_ENTITY_DECL:
12038 xmlGenericError(xmlGenericErrorContext,
12039 "PP: internal error, state == ENTITY_DECL\n");
12040 ctxt->instate = XML_PARSER_DTD;
12041#ifdef DEBUG_PUSH
12042 xmlGenericError(xmlGenericErrorContext,
12043 "PP: entering DTD\n");
12044#endif
12045 break;
12046 case XML_PARSER_ENTITY_VALUE:
12047 xmlGenericError(xmlGenericErrorContext,
12048 "PP: internal error, state == ENTITY_VALUE\n");
12049 ctxt->instate = XML_PARSER_CONTENT;
12050#ifdef DEBUG_PUSH
12051 xmlGenericError(xmlGenericErrorContext,
12052 "PP: entering DTD\n");
12053#endif
12054 break;
12055 case XML_PARSER_ATTRIBUTE_VALUE:
12056 xmlGenericError(xmlGenericErrorContext,
12057 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12058 ctxt->instate = XML_PARSER_START_TAG;
12059#ifdef DEBUG_PUSH
12060 xmlGenericError(xmlGenericErrorContext,
12061 "PP: entering START_TAG\n");
12062#endif
12063 break;
12064 case XML_PARSER_SYSTEM_LITERAL:
12065 xmlGenericError(xmlGenericErrorContext,
12066 "PP: internal error, state == SYSTEM_LITERAL\n");
12067 ctxt->instate = XML_PARSER_START_TAG;
12068#ifdef DEBUG_PUSH
12069 xmlGenericError(xmlGenericErrorContext,
12070 "PP: entering START_TAG\n");
12071#endif
12072 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012073 case XML_PARSER_PUBLIC_LITERAL:
12074 xmlGenericError(xmlGenericErrorContext,
12075 "PP: internal error, state == PUBLIC_LITERAL\n");
12076 ctxt->instate = XML_PARSER_START_TAG;
12077#ifdef DEBUG_PUSH
12078 xmlGenericError(xmlGenericErrorContext,
12079 "PP: entering START_TAG\n");
12080#endif
12081 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012082 }
12083 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012084done:
Owen Taylor3473f882001-02-23 17:55:21 +000012085#ifdef DEBUG_PUSH
12086 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12087#endif
12088 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012089encoding_error:
12090 {
12091 char buffer[150];
12092
12093 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12094 ctxt->input->cur[0], ctxt->input->cur[1],
12095 ctxt->input->cur[2], ctxt->input->cur[3]);
12096 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12097 "Input is not proper UTF-8, indicate encoding !\n%s",
12098 BAD_CAST buffer, NULL);
12099 }
12100 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012101}
12102
12103/**
Daniel Veillard65686452012-07-19 18:25:01 +080012104 * xmlParseCheckTransition:
12105 * @ctxt: an XML parser context
12106 * @chunk: a char array
12107 * @size: the size in byte of the chunk
12108 *
12109 * Check depending on the current parser state if the chunk given must be
12110 * processed immediately or one need more data to advance on parsing.
12111 *
12112 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12113 */
12114static int
12115xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12116 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12117 return(-1);
12118 if (ctxt->instate == XML_PARSER_START_TAG) {
12119 if (memchr(chunk, '>', size) != NULL)
12120 return(1);
12121 return(0);
12122 }
12123 if (ctxt->progressive == XML_PARSER_COMMENT) {
12124 if (memchr(chunk, '>', size) != NULL)
12125 return(1);
12126 return(0);
12127 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012128 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12129 if (memchr(chunk, '>', size) != NULL)
12130 return(1);
12131 return(0);
12132 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012133 if (ctxt->progressive == XML_PARSER_PI) {
12134 if (memchr(chunk, '>', size) != NULL)
12135 return(1);
12136 return(0);
12137 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012138 if (ctxt->instate == XML_PARSER_END_TAG) {
12139 if (memchr(chunk, '>', size) != NULL)
12140 return(1);
12141 return(0);
12142 }
12143 if ((ctxt->progressive == XML_PARSER_DTD) ||
12144 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012145 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012146 return(1);
12147 return(0);
12148 }
Daniel Veillard65686452012-07-19 18:25:01 +080012149 return(1);
12150}
12151
12152/**
Owen Taylor3473f882001-02-23 17:55:21 +000012153 * xmlParseChunk:
12154 * @ctxt: an XML parser context
12155 * @chunk: an char array
12156 * @size: the size in byte of the chunk
12157 * @terminate: last chunk indicator
12158 *
12159 * Parse a Chunk of memory
12160 *
12161 * Returns zero if no error, the xmlParserErrors otherwise.
12162 */
12163int
12164xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12165 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012166 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012167 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012168 size_t old_avail = 0;
12169 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012170
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012171 if (ctxt == NULL)
12172 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012173 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012174 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012175 if (ctxt->instate == XML_PARSER_EOF)
12176 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012177 if (ctxt->instate == XML_PARSER_START)
12178 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012179 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12180 (chunk[size - 1] == '\r')) {
12181 end_in_lf = 1;
12182 size--;
12183 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012184
12185xmldecl_done:
12186
Owen Taylor3473f882001-02-23 17:55:21 +000012187 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12188 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012189 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12190 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012191 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012192
Daniel Veillard65686452012-07-19 18:25:01 +080012193 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012194 /*
12195 * Specific handling if we autodetected an encoding, we should not
12196 * push more than the first line ... which depend on the encoding
12197 * And only push the rest once the final encoding was detected
12198 */
12199 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12200 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012201 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012202
12203 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12204 BAD_CAST "UTF-16")) ||
12205 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12206 BAD_CAST "UTF16")))
12207 len = 90;
12208 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12209 BAD_CAST "UCS-4")) ||
12210 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12211 BAD_CAST "UCS4")))
12212 len = 180;
12213
12214 if (ctxt->input->buf->rawconsumed < len)
12215 len -= ctxt->input->buf->rawconsumed;
12216
Raul Hudeaba9716a2010-03-15 10:13:29 +010012217 /*
12218 * Change size for reading the initial declaration only
12219 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12220 * will blindly copy extra bytes from memory.
12221 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012222 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012223 remain = size - len;
12224 size = len;
12225 } else {
12226 remain = 0;
12227 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012228 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012229 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012230 if (res < 0) {
12231 ctxt->errNo = XML_PARSER_EOF;
12232 ctxt->disableSAX = 1;
12233 return (XML_PARSER_EOF);
12234 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012235 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012236#ifdef DEBUG_PUSH
12237 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12238#endif
12239
Owen Taylor3473f882001-02-23 17:55:21 +000012240 } else if (ctxt->instate != XML_PARSER_EOF) {
12241 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12242 xmlParserInputBufferPtr in = ctxt->input->buf;
12243 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12244 (in->raw != NULL)) {
12245 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012246 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12247 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012248
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012249 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012250 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012251 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012252 xmlGenericError(xmlGenericErrorContext,
12253 "xmlParseChunk: encoder error\n");
12254 return(XML_ERR_INVALID_ENCODING);
12255 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012256 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012257 }
12258 }
12259 }
Daniel Veillard65686452012-07-19 18:25:01 +080012260 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012261 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012262 } else {
12263 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12264 avail = xmlBufUse(ctxt->input->buf->buffer);
12265 /*
12266 * Depending on the current state it may not be such
12267 * a good idea to try parsing if there is nothing in the chunk
12268 * which would be worth doing a parser state transition and we
12269 * need to wait for more data
12270 */
12271 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12272 (old_avail == 0) || (avail == 0) ||
12273 (xmlParseCheckTransition(ctxt,
12274 (const char *)&ctxt->input->base[old_avail],
12275 avail - old_avail)))
12276 xmlParseTryOrFinish(ctxt, terminate);
12277 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012278 if (ctxt->instate == XML_PARSER_EOF)
12279 return(ctxt->errNo);
12280
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012281 if ((ctxt->input != NULL) &&
12282 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12283 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12284 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12285 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12286 ctxt->instate = XML_PARSER_EOF;
12287 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012288 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12289 return(ctxt->errNo);
12290
12291 if (remain != 0) {
12292 chunk += size;
12293 size = remain;
12294 remain = 0;
12295 goto xmldecl_done;
12296 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012297 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12298 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012299 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12300 ctxt->input);
12301 size_t current = ctxt->input->cur - ctxt->input->base;
12302
Daniel Veillarda617e242006-01-09 14:38:44 +000012303 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012304
12305 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12306 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012307 }
Owen Taylor3473f882001-02-23 17:55:21 +000012308 if (terminate) {
12309 /*
12310 * Check for termination
12311 */
Daniel Veillard65686452012-07-19 18:25:01 +080012312 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012313
12314 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012315 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012316 cur_avail = ctxt->input->length -
12317 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012318 else
Daniel Veillard65686452012-07-19 18:25:01 +080012319 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12320 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012321 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012322
Owen Taylor3473f882001-02-23 17:55:21 +000012323 if ((ctxt->instate != XML_PARSER_EOF) &&
12324 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012325 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012326 }
Daniel Veillard65686452012-07-19 18:25:01 +080012327 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012328 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012329 }
Owen Taylor3473f882001-02-23 17:55:21 +000012330 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012331 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012332 ctxt->sax->endDocument(ctxt->userData);
12333 }
12334 ctxt->instate = XML_PARSER_EOF;
12335 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012336 if (ctxt->wellFormed == 0)
12337 return((xmlParserErrors) ctxt->errNo);
12338 else
12339 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012340}
12341
12342/************************************************************************
12343 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012344 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012345 * *
12346 ************************************************************************/
12347
12348/**
Owen Taylor3473f882001-02-23 17:55:21 +000012349 * xmlCreatePushParserCtxt:
12350 * @sax: a SAX handler
12351 * @user_data: The user data returned on SAX callbacks
12352 * @chunk: a pointer to an array of chars
12353 * @size: number of chars in the array
12354 * @filename: an optional file name or URI
12355 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012356 * Create a parser context for using the XML parser in push mode.
12357 * If @buffer and @size are non-NULL, the data is used to detect
12358 * the encoding. The remaining characters will be parsed so they
12359 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012360 * To allow content encoding detection, @size should be >= 4
12361 * The value of @filename is used for fetching external entities
12362 * and error/warning reports.
12363 *
12364 * Returns the new parser context or NULL
12365 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012366
Owen Taylor3473f882001-02-23 17:55:21 +000012367xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012368xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012369 const char *chunk, int size, const char *filename) {
12370 xmlParserCtxtPtr ctxt;
12371 xmlParserInputPtr inputStream;
12372 xmlParserInputBufferPtr buf;
12373 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12374
12375 /*
12376 * plug some encoding conversion routines
12377 */
12378 if ((chunk != NULL) && (size >= 4))
12379 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12380
12381 buf = xmlAllocParserInputBuffer(enc);
12382 if (buf == NULL) return(NULL);
12383
12384 ctxt = xmlNewParserCtxt();
12385 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012386 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012387 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012388 return(NULL);
12389 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012390 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012391 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12392 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012393 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012394 xmlFreeParserInputBuffer(buf);
12395 xmlFreeParserCtxt(ctxt);
12396 return(NULL);
12397 }
Owen Taylor3473f882001-02-23 17:55:21 +000012398 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012399#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012400 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012401#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012402 xmlFree(ctxt->sax);
12403 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12404 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012405 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012406 xmlFreeParserInputBuffer(buf);
12407 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012408 return(NULL);
12409 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012410 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12411 if (sax->initialized == XML_SAX2_MAGIC)
12412 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12413 else
12414 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012415 if (user_data != NULL)
12416 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012417 }
Owen Taylor3473f882001-02-23 17:55:21 +000012418 if (filename == NULL) {
12419 ctxt->directory = NULL;
12420 } else {
12421 ctxt->directory = xmlParserGetDirectory(filename);
12422 }
12423
12424 inputStream = xmlNewInputStream(ctxt);
12425 if (inputStream == NULL) {
12426 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012427 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012428 return(NULL);
12429 }
12430
12431 if (filename == NULL)
12432 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012433 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012434 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012435 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012436 if (inputStream->filename == NULL) {
12437 xmlFreeParserCtxt(ctxt);
12438 xmlFreeParserInputBuffer(buf);
12439 return(NULL);
12440 }
12441 }
Owen Taylor3473f882001-02-23 17:55:21 +000012442 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012443 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012444 inputPush(ctxt, inputStream);
12445
William M. Brack3a1cd212005-02-11 14:35:54 +000012446 /*
12447 * If the caller didn't provide an initial 'chunk' for determining
12448 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12449 * that it can be automatically determined later
12450 */
12451 if ((size == 0) || (chunk == NULL)) {
12452 ctxt->charset = XML_CHAR_ENCODING_NONE;
12453 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012454 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12455 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012456
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012457 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012458
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012459 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012460#ifdef DEBUG_PUSH
12461 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12462#endif
12463 }
12464
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012465 if (enc != XML_CHAR_ENCODING_NONE) {
12466 xmlSwitchEncoding(ctxt, enc);
12467 }
12468
Owen Taylor3473f882001-02-23 17:55:21 +000012469 return(ctxt);
12470}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012471#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012472
12473/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012474 * xmlStopParser:
12475 * @ctxt: an XML parser context
12476 *
12477 * Blocks further parser processing
12478 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012479void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012480xmlStopParser(xmlParserCtxtPtr ctxt) {
12481 if (ctxt == NULL)
12482 return;
12483 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarde50ba812013-04-11 15:54:51 +080012484 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012485 ctxt->disableSAX = 1;
12486 if (ctxt->input != NULL) {
12487 ctxt->input->cur = BAD_CAST"";
12488 ctxt->input->base = ctxt->input->cur;
12489 }
12490}
12491
12492/**
Owen Taylor3473f882001-02-23 17:55:21 +000012493 * xmlCreateIOParserCtxt:
12494 * @sax: a SAX handler
12495 * @user_data: The user data returned on SAX callbacks
12496 * @ioread: an I/O read function
12497 * @ioclose: an I/O close function
12498 * @ioctx: an I/O handler
12499 * @enc: the charset encoding if known
12500 *
12501 * Create a parser context for using the XML parser with an existing
12502 * I/O stream
12503 *
12504 * Returns the new parser context or NULL
12505 */
12506xmlParserCtxtPtr
12507xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12508 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12509 void *ioctx, xmlCharEncoding enc) {
12510 xmlParserCtxtPtr ctxt;
12511 xmlParserInputPtr inputStream;
12512 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012513
Daniel Veillard42595322004-11-08 10:52:06 +000012514 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012515
12516 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012517 if (buf == NULL) {
12518 if (ioclose != NULL)
12519 ioclose(ioctx);
12520 return (NULL);
12521 }
Owen Taylor3473f882001-02-23 17:55:21 +000012522
12523 ctxt = xmlNewParserCtxt();
12524 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012525 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012526 return(NULL);
12527 }
12528 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012529#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012530 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012531#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012532 xmlFree(ctxt->sax);
12533 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12534 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012535 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012536 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012537 return(NULL);
12538 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012539 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12540 if (sax->initialized == XML_SAX2_MAGIC)
12541 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12542 else
12543 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012544 if (user_data != NULL)
12545 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012546 }
Owen Taylor3473f882001-02-23 17:55:21 +000012547
12548 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12549 if (inputStream == NULL) {
12550 xmlFreeParserCtxt(ctxt);
12551 return(NULL);
12552 }
12553 inputPush(ctxt, inputStream);
12554
12555 return(ctxt);
12556}
12557
Daniel Veillard4432df22003-09-28 18:58:27 +000012558#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012559/************************************************************************
12560 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012561 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012562 * *
12563 ************************************************************************/
12564
12565/**
12566 * xmlIOParseDTD:
12567 * @sax: the SAX handler block or NULL
12568 * @input: an Input Buffer
12569 * @enc: the charset encoding if known
12570 *
12571 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012572 *
Owen Taylor3473f882001-02-23 17:55:21 +000012573 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012574 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012575 */
12576
12577xmlDtdPtr
12578xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12579 xmlCharEncoding enc) {
12580 xmlDtdPtr ret = NULL;
12581 xmlParserCtxtPtr ctxt;
12582 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012583 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012584
12585 if (input == NULL)
12586 return(NULL);
12587
12588 ctxt = xmlNewParserCtxt();
12589 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012590 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012591 return(NULL);
12592 }
12593
12594 /*
12595 * Set-up the SAX context
12596 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012597 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012598 if (ctxt->sax != NULL)
12599 xmlFree(ctxt->sax);
12600 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012601 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012602 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012603 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012604
12605 /*
12606 * generate a parser input from the I/O handler
12607 */
12608
Daniel Veillard43caefb2003-12-07 19:32:22 +000012609 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012610 if (pinput == NULL) {
12611 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012612 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012613 xmlFreeParserCtxt(ctxt);
12614 return(NULL);
12615 }
12616
12617 /*
12618 * plug some encoding conversion routines here.
12619 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012620 if (xmlPushInput(ctxt, pinput) < 0) {
12621 if (sax != NULL) ctxt->sax = NULL;
12622 xmlFreeParserCtxt(ctxt);
12623 return(NULL);
12624 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012625 if (enc != XML_CHAR_ENCODING_NONE) {
12626 xmlSwitchEncoding(ctxt, enc);
12627 }
Owen Taylor3473f882001-02-23 17:55:21 +000012628
12629 pinput->filename = NULL;
12630 pinput->line = 1;
12631 pinput->col = 1;
12632 pinput->base = ctxt->input->cur;
12633 pinput->cur = ctxt->input->cur;
12634 pinput->free = NULL;
12635
12636 /*
12637 * let's parse that entity knowing it's an external subset.
12638 */
12639 ctxt->inSubset = 2;
12640 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012641 if (ctxt->myDoc == NULL) {
12642 xmlErrMemory(ctxt, "New Doc failed");
12643 return(NULL);
12644 }
12645 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012646 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12647 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012648
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012649 if ((enc == XML_CHAR_ENCODING_NONE) &&
12650 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012651 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012652 * Get the 4 first bytes and decode the charset
12653 * if enc != XML_CHAR_ENCODING_NONE
12654 * plug some encoding conversion routines.
12655 */
12656 start[0] = RAW;
12657 start[1] = NXT(1);
12658 start[2] = NXT(2);
12659 start[3] = NXT(3);
12660 enc = xmlDetectCharEncoding(start, 4);
12661 if (enc != XML_CHAR_ENCODING_NONE) {
12662 xmlSwitchEncoding(ctxt, enc);
12663 }
12664 }
12665
Owen Taylor3473f882001-02-23 17:55:21 +000012666 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12667
12668 if (ctxt->myDoc != NULL) {
12669 if (ctxt->wellFormed) {
12670 ret = ctxt->myDoc->extSubset;
12671 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012672 if (ret != NULL) {
12673 xmlNodePtr tmp;
12674
12675 ret->doc = NULL;
12676 tmp = ret->children;
12677 while (tmp != NULL) {
12678 tmp->doc = NULL;
12679 tmp = tmp->next;
12680 }
12681 }
Owen Taylor3473f882001-02-23 17:55:21 +000012682 } else {
12683 ret = NULL;
12684 }
12685 xmlFreeDoc(ctxt->myDoc);
12686 ctxt->myDoc = NULL;
12687 }
12688 if (sax != NULL) ctxt->sax = NULL;
12689 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012690
Owen Taylor3473f882001-02-23 17:55:21 +000012691 return(ret);
12692}
12693
12694/**
12695 * xmlSAXParseDTD:
12696 * @sax: the SAX handler block
12697 * @ExternalID: a NAME* containing the External ID of the DTD
12698 * @SystemID: a NAME* containing the URL to the DTD
12699 *
12700 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012701 *
Owen Taylor3473f882001-02-23 17:55:21 +000012702 * Returns the resulting xmlDtdPtr or NULL in case of error.
12703 */
12704
12705xmlDtdPtr
12706xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12707 const xmlChar *SystemID) {
12708 xmlDtdPtr ret = NULL;
12709 xmlParserCtxtPtr ctxt;
12710 xmlParserInputPtr input = NULL;
12711 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012712 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012713
12714 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12715
12716 ctxt = xmlNewParserCtxt();
12717 if (ctxt == NULL) {
12718 return(NULL);
12719 }
12720
12721 /*
12722 * Set-up the SAX context
12723 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012724 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012725 if (ctxt->sax != NULL)
12726 xmlFree(ctxt->sax);
12727 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012728 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012729 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012730
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012731 /*
12732 * Canonicalise the system ID
12733 */
12734 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012735 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012736 xmlFreeParserCtxt(ctxt);
12737 return(NULL);
12738 }
Owen Taylor3473f882001-02-23 17:55:21 +000012739
12740 /*
12741 * Ask the Entity resolver to load the damn thing
12742 */
12743
12744 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012745 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12746 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012747 if (input == NULL) {
12748 if (sax != NULL) ctxt->sax = NULL;
12749 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012750 if (systemIdCanonic != NULL)
12751 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012752 return(NULL);
12753 }
12754
12755 /*
12756 * plug some encoding conversion routines here.
12757 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012758 if (xmlPushInput(ctxt, input) < 0) {
12759 if (sax != NULL) ctxt->sax = NULL;
12760 xmlFreeParserCtxt(ctxt);
12761 if (systemIdCanonic != NULL)
12762 xmlFree(systemIdCanonic);
12763 return(NULL);
12764 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012765 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12766 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12767 xmlSwitchEncoding(ctxt, enc);
12768 }
Owen Taylor3473f882001-02-23 17:55:21 +000012769
12770 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012771 input->filename = (char *) systemIdCanonic;
12772 else
12773 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012774 input->line = 1;
12775 input->col = 1;
12776 input->base = ctxt->input->cur;
12777 input->cur = ctxt->input->cur;
12778 input->free = NULL;
12779
12780 /*
12781 * let's parse that entity knowing it's an external subset.
12782 */
12783 ctxt->inSubset = 2;
12784 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012785 if (ctxt->myDoc == NULL) {
12786 xmlErrMemory(ctxt, "New Doc failed");
12787 if (sax != NULL) ctxt->sax = NULL;
12788 xmlFreeParserCtxt(ctxt);
12789 return(NULL);
12790 }
12791 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012792 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12793 ExternalID, SystemID);
12794 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12795
12796 if (ctxt->myDoc != NULL) {
12797 if (ctxt->wellFormed) {
12798 ret = ctxt->myDoc->extSubset;
12799 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012800 if (ret != NULL) {
12801 xmlNodePtr tmp;
12802
12803 ret->doc = NULL;
12804 tmp = ret->children;
12805 while (tmp != NULL) {
12806 tmp->doc = NULL;
12807 tmp = tmp->next;
12808 }
12809 }
Owen Taylor3473f882001-02-23 17:55:21 +000012810 } else {
12811 ret = NULL;
12812 }
12813 xmlFreeDoc(ctxt->myDoc);
12814 ctxt->myDoc = NULL;
12815 }
12816 if (sax != NULL) ctxt->sax = NULL;
12817 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012818
Owen Taylor3473f882001-02-23 17:55:21 +000012819 return(ret);
12820}
12821
Daniel Veillard4432df22003-09-28 18:58:27 +000012822
Owen Taylor3473f882001-02-23 17:55:21 +000012823/**
12824 * xmlParseDTD:
12825 * @ExternalID: a NAME* containing the External ID of the DTD
12826 * @SystemID: a NAME* containing the URL to the DTD
12827 *
12828 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012829 *
Owen Taylor3473f882001-02-23 17:55:21 +000012830 * Returns the resulting xmlDtdPtr or NULL in case of error.
12831 */
12832
12833xmlDtdPtr
12834xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12835 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12836}
Daniel Veillard4432df22003-09-28 18:58:27 +000012837#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012838
12839/************************************************************************
12840 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012841 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012842 * *
12843 ************************************************************************/
12844
12845/**
Owen Taylor3473f882001-02-23 17:55:21 +000012846 * xmlParseCtxtExternalEntity:
12847 * @ctx: the existing parsing context
12848 * @URL: the URL for the entity to load
12849 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012850 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012851 *
12852 * Parse an external general entity within an existing parsing context
12853 * An external general parsed entity is well-formed if it matches the
12854 * production labeled extParsedEnt.
12855 *
12856 * [78] extParsedEnt ::= TextDecl? content
12857 *
12858 * Returns 0 if the entity is well formed, -1 in case of args problem and
12859 * the parser error code otherwise
12860 */
12861
12862int
12863xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012864 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012865 xmlParserCtxtPtr ctxt;
12866 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012867 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012868 xmlSAXHandlerPtr oldsax = NULL;
12869 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012870 xmlChar start[4];
12871 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012872
Daniel Veillardce682bc2004-11-05 17:22:25 +000012873 if (ctx == NULL) return(-1);
12874
Daniel Veillard0161e632008-08-28 15:36:32 +000012875 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12876 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012877 return(XML_ERR_ENTITY_LOOP);
12878 }
12879
Daniel Veillardcda96922001-08-21 10:56:31 +000012880 if (lst != NULL)
12881 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012882 if ((URL == NULL) && (ID == NULL))
12883 return(-1);
12884 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12885 return(-1);
12886
Rob Richards798743a2009-06-19 13:54:25 -040012887 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012888 if (ctxt == NULL) {
12889 return(-1);
12890 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012891
Owen Taylor3473f882001-02-23 17:55:21 +000012892 oldsax = ctxt->sax;
12893 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012894 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012895 newDoc = xmlNewDoc(BAD_CAST "1.0");
12896 if (newDoc == NULL) {
12897 xmlFreeParserCtxt(ctxt);
12898 return(-1);
12899 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012900 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012901 if (ctx->myDoc->dict) {
12902 newDoc->dict = ctx->myDoc->dict;
12903 xmlDictReference(newDoc->dict);
12904 }
Owen Taylor3473f882001-02-23 17:55:21 +000012905 if (ctx->myDoc != NULL) {
12906 newDoc->intSubset = ctx->myDoc->intSubset;
12907 newDoc->extSubset = ctx->myDoc->extSubset;
12908 }
12909 if (ctx->myDoc->URL != NULL) {
12910 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12911 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012912 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12913 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012914 ctxt->sax = oldsax;
12915 xmlFreeParserCtxt(ctxt);
12916 newDoc->intSubset = NULL;
12917 newDoc->extSubset = NULL;
12918 xmlFreeDoc(newDoc);
12919 return(-1);
12920 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012921 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012922 nodePush(ctxt, newDoc->children);
12923 if (ctx->myDoc == NULL) {
12924 ctxt->myDoc = newDoc;
12925 } else {
12926 ctxt->myDoc = ctx->myDoc;
12927 newDoc->children->doc = ctx->myDoc;
12928 }
12929
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012930 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012931 * Get the 4 first bytes and decode the charset
12932 * if enc != XML_CHAR_ENCODING_NONE
12933 * plug some encoding conversion routines.
12934 */
12935 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012936 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12937 start[0] = RAW;
12938 start[1] = NXT(1);
12939 start[2] = NXT(2);
12940 start[3] = NXT(3);
12941 enc = xmlDetectCharEncoding(start, 4);
12942 if (enc != XML_CHAR_ENCODING_NONE) {
12943 xmlSwitchEncoding(ctxt, enc);
12944 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012945 }
12946
Owen Taylor3473f882001-02-23 17:55:21 +000012947 /*
12948 * Parse a possible text declaration first
12949 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012950 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012951 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012952 /*
12953 * An XML-1.0 document can't reference an entity not XML-1.0
12954 */
12955 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12956 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012957 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012958 "Version mismatch between document and entity\n");
12959 }
Owen Taylor3473f882001-02-23 17:55:21 +000012960 }
12961
12962 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012963 * If the user provided its own SAX callbacks then reuse the
12964 * useData callback field, otherwise the expected setup in a
12965 * DOM builder is to have userData == ctxt
12966 */
12967 if (ctx->userData == ctx)
12968 ctxt->userData = ctxt;
12969 else
12970 ctxt->userData = ctx->userData;
12971
12972 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012973 * Doing validity checking on chunk doesn't make sense
12974 */
12975 ctxt->instate = XML_PARSER_CONTENT;
12976 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012977 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012978 ctxt->loadsubset = ctx->loadsubset;
12979 ctxt->depth = ctx->depth + 1;
12980 ctxt->replaceEntities = ctx->replaceEntities;
12981 if (ctxt->validate) {
12982 ctxt->vctxt.error = ctx->vctxt.error;
12983 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012984 } else {
12985 ctxt->vctxt.error = NULL;
12986 ctxt->vctxt.warning = NULL;
12987 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012988 ctxt->vctxt.nodeTab = NULL;
12989 ctxt->vctxt.nodeNr = 0;
12990 ctxt->vctxt.nodeMax = 0;
12991 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012992 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12993 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012994 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12995 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12996 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012997 ctxt->dictNames = ctx->dictNames;
12998 ctxt->attsDefault = ctx->attsDefault;
12999 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013000 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013001
13002 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013003
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013004 ctx->validate = ctxt->validate;
13005 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013006 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013007 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013008 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013009 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013010 }
13011 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013012 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013013 }
13014
13015 if (!ctxt->wellFormed) {
13016 if (ctxt->errNo == 0)
13017 ret = 1;
13018 else
13019 ret = ctxt->errNo;
13020 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013021 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013022 xmlNodePtr cur;
13023
13024 /*
13025 * Return the newly created nodeset after unlinking it from
13026 * they pseudo parent.
13027 */
13028 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013029 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013030 while (cur != NULL) {
13031 cur->parent = NULL;
13032 cur = cur->next;
13033 }
13034 newDoc->children->children = NULL;
13035 }
13036 ret = 0;
13037 }
13038 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013039 ctxt->dict = NULL;
13040 ctxt->attsDefault = NULL;
13041 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013042 xmlFreeParserCtxt(ctxt);
13043 newDoc->intSubset = NULL;
13044 newDoc->extSubset = NULL;
13045 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013046
Owen Taylor3473f882001-02-23 17:55:21 +000013047 return(ret);
13048}
13049
13050/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013051 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013052 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013053 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013054 * @sax: the SAX handler bloc (possibly NULL)
13055 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13056 * @depth: Used for loop detection, use 0
13057 * @URL: the URL for the entity to load
13058 * @ID: the System ID for the entity to load
13059 * @list: the return value for the set of parsed nodes
13060 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013061 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013062 *
13063 * Returns 0 if the entity is well formed, -1 in case of args problem and
13064 * the parser error code otherwise
13065 */
13066
Daniel Veillard7d515752003-09-26 19:12:37 +000013067static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013068xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13069 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013070 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013071 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013072 xmlParserCtxtPtr ctxt;
13073 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013074 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013075 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013076 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013077 xmlChar start[4];
13078 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013079
Daniel Veillard0161e632008-08-28 15:36:32 +000013080 if (((depth > 40) &&
13081 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13082 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013083 return(XML_ERR_ENTITY_LOOP);
13084 }
13085
Owen Taylor3473f882001-02-23 17:55:21 +000013086 if (list != NULL)
13087 *list = NULL;
13088 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013089 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013090 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013091 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013092
13093
Rob Richards9c0aa472009-03-26 18:10:19 +000013094 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013095 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013096 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013097 if (oldctxt != NULL) {
13098 ctxt->_private = oldctxt->_private;
13099 ctxt->loadsubset = oldctxt->loadsubset;
13100 ctxt->validate = oldctxt->validate;
13101 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013102 ctxt->record_info = oldctxt->record_info;
13103 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13104 ctxt->node_seq.length = oldctxt->node_seq.length;
13105 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013106 } else {
13107 /*
13108 * Doing validity checking on chunk without context
13109 * doesn't make sense
13110 */
13111 ctxt->_private = NULL;
13112 ctxt->validate = 0;
13113 ctxt->external = 2;
13114 ctxt->loadsubset = 0;
13115 }
Owen Taylor3473f882001-02-23 17:55:21 +000013116 if (sax != NULL) {
13117 oldsax = ctxt->sax;
13118 ctxt->sax = sax;
13119 if (user_data != NULL)
13120 ctxt->userData = user_data;
13121 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013122 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013123 newDoc = xmlNewDoc(BAD_CAST "1.0");
13124 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013125 ctxt->node_seq.maximum = 0;
13126 ctxt->node_seq.length = 0;
13127 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013128 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013129 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013130 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013131 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013132 newDoc->intSubset = doc->intSubset;
13133 newDoc->extSubset = doc->extSubset;
13134 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013135 xmlDictReference(newDoc->dict);
13136
Owen Taylor3473f882001-02-23 17:55:21 +000013137 if (doc->URL != NULL) {
13138 newDoc->URL = xmlStrdup(doc->URL);
13139 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013140 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13141 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013142 if (sax != NULL)
13143 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013144 ctxt->node_seq.maximum = 0;
13145 ctxt->node_seq.length = 0;
13146 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013147 xmlFreeParserCtxt(ctxt);
13148 newDoc->intSubset = NULL;
13149 newDoc->extSubset = NULL;
13150 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013151 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013152 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013153 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013154 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013155 ctxt->myDoc = doc;
13156 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013157
Daniel Veillard0161e632008-08-28 15:36:32 +000013158 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013159 * Get the 4 first bytes and decode the charset
13160 * if enc != XML_CHAR_ENCODING_NONE
13161 * plug some encoding conversion routines.
13162 */
13163 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013164 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13165 start[0] = RAW;
13166 start[1] = NXT(1);
13167 start[2] = NXT(2);
13168 start[3] = NXT(3);
13169 enc = xmlDetectCharEncoding(start, 4);
13170 if (enc != XML_CHAR_ENCODING_NONE) {
13171 xmlSwitchEncoding(ctxt, enc);
13172 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013173 }
13174
Owen Taylor3473f882001-02-23 17:55:21 +000013175 /*
13176 * Parse a possible text declaration first
13177 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013178 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013179 xmlParseTextDecl(ctxt);
13180 }
13181
Owen Taylor3473f882001-02-23 17:55:21 +000013182 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013183 ctxt->depth = depth;
13184
13185 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013186
Daniel Veillard561b7f82002-03-20 21:55:57 +000013187 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013188 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013189 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013190 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013191 }
13192 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013193 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013194 }
13195
13196 if (!ctxt->wellFormed) {
13197 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013198 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013199 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013200 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013201 } else {
13202 if (list != NULL) {
13203 xmlNodePtr cur;
13204
13205 /*
13206 * Return the newly created nodeset after unlinking it from
13207 * they pseudo parent.
13208 */
13209 cur = newDoc->children->children;
13210 *list = cur;
13211 while (cur != NULL) {
13212 cur->parent = NULL;
13213 cur = cur->next;
13214 }
13215 newDoc->children->children = NULL;
13216 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013217 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013218 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013219
13220 /*
13221 * Record in the parent context the number of entities replacement
13222 * done when parsing that reference.
13223 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013224 if (oldctxt != NULL)
13225 oldctxt->nbentities += ctxt->nbentities;
13226
Daniel Veillard0161e632008-08-28 15:36:32 +000013227 /*
13228 * Also record the size of the entity parsed
13229 */
13230 if (ctxt->input != NULL) {
13231 oldctxt->sizeentities += ctxt->input->consumed;
13232 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13233 }
13234 /*
13235 * And record the last error if any
13236 */
13237 if (ctxt->lastError.code != XML_ERR_OK)
13238 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13239
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013240 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013241 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013242 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13243 oldctxt->node_seq.length = ctxt->node_seq.length;
13244 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013245 ctxt->node_seq.maximum = 0;
13246 ctxt->node_seq.length = 0;
13247 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013248 xmlFreeParserCtxt(ctxt);
13249 newDoc->intSubset = NULL;
13250 newDoc->extSubset = NULL;
13251 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013252
Owen Taylor3473f882001-02-23 17:55:21 +000013253 return(ret);
13254}
13255
Daniel Veillard81273902003-09-30 00:43:48 +000013256#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013257/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013258 * xmlParseExternalEntity:
13259 * @doc: the document the chunk pertains to
13260 * @sax: the SAX handler bloc (possibly NULL)
13261 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13262 * @depth: Used for loop detection, use 0
13263 * @URL: the URL for the entity to load
13264 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013265 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013266 *
13267 * Parse an external general entity
13268 * An external general parsed entity is well-formed if it matches the
13269 * production labeled extParsedEnt.
13270 *
13271 * [78] extParsedEnt ::= TextDecl? content
13272 *
13273 * Returns 0 if the entity is well formed, -1 in case of args problem and
13274 * the parser error code otherwise
13275 */
13276
13277int
13278xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013279 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013280 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013281 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013282}
13283
13284/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013285 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013286 * @doc: the document the chunk pertains to
13287 * @sax: the SAX handler bloc (possibly NULL)
13288 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13289 * @depth: Used for loop detection, use 0
13290 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013291 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013292 *
13293 * Parse a well-balanced chunk of an XML document
13294 * called by the parser
13295 * The allowed sequence for the Well Balanced Chunk is the one defined by
13296 * the content production in the XML grammar:
13297 *
13298 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13299 *
13300 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13301 * the parser error code otherwise
13302 */
13303
13304int
13305xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013306 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013307 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13308 depth, string, lst, 0 );
13309}
Daniel Veillard81273902003-09-30 00:43:48 +000013310#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013311
13312/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013313 * xmlParseBalancedChunkMemoryInternal:
13314 * @oldctxt: the existing parsing context
13315 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13316 * @user_data: the user data field for the parser context
13317 * @lst: the return value for the set of parsed nodes
13318 *
13319 *
13320 * Parse a well-balanced chunk of an XML document
13321 * called by the parser
13322 * The allowed sequence for the Well Balanced Chunk is the one defined by
13323 * the content production in the XML grammar:
13324 *
13325 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13326 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013327 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13328 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013329 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013330 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013331 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013332 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013333static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013334xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13335 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13336 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013337 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013338 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013339 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013340 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013341 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013342 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013343 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013344#ifdef SAX2
13345 int i;
13346#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013347
Daniel Veillard0161e632008-08-28 15:36:32 +000013348 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13349 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013350 return(XML_ERR_ENTITY_LOOP);
13351 }
13352
13353
13354 if (lst != NULL)
13355 *lst = NULL;
13356 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013357 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013358
13359 size = xmlStrlen(string);
13360
13361 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013362 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013363 if (user_data != NULL)
13364 ctxt->userData = user_data;
13365 else
13366 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013367 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13368 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013369 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13370 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13371 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013372
Daniel Veillard74eaec12009-08-26 15:57:20 +020013373#ifdef SAX2
13374 /* propagate namespaces down the entity */
13375 for (i = 0;i < oldctxt->nsNr;i += 2) {
13376 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13377 }
13378#endif
13379
Daniel Veillard328f48c2002-11-15 15:24:34 +000013380 oldsax = ctxt->sax;
13381 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013382 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013383 ctxt->replaceEntities = oldctxt->replaceEntities;
13384 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013385
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013386 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013387 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013388 newDoc = xmlNewDoc(BAD_CAST "1.0");
13389 if (newDoc == NULL) {
13390 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013391 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013392 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013393 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013394 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013395 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013396 newDoc->dict = ctxt->dict;
13397 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013398 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013399 } else {
13400 ctxt->myDoc = oldctxt->myDoc;
13401 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013402 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013403 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013404 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13405 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013406 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013407 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013408 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013409 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013410 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013411 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013412 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013413 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013414 ctxt->myDoc->children = NULL;
13415 ctxt->myDoc->last = NULL;
13416 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013417 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013418 ctxt->instate = XML_PARSER_CONTENT;
13419 ctxt->depth = oldctxt->depth + 1;
13420
Daniel Veillard328f48c2002-11-15 15:24:34 +000013421 ctxt->validate = 0;
13422 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013423 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13424 /*
13425 * ID/IDREF registration will be done in xmlValidateElement below
13426 */
13427 ctxt->loadsubset |= XML_SKIP_IDS;
13428 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013429 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013430 ctxt->attsDefault = oldctxt->attsDefault;
13431 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013432
Daniel Veillard68e9e742002-11-16 15:35:11 +000013433 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013434 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013435 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013436 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013437 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013438 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013439 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013440 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013441 }
13442
13443 if (!ctxt->wellFormed) {
13444 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013445 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013446 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013447 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013448 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013449 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013450 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013451
William M. Brack7b9154b2003-09-27 19:23:50 +000013452 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013453 xmlNodePtr cur;
13454
13455 /*
13456 * Return the newly created nodeset after unlinking it from
13457 * they pseudo parent.
13458 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013459 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013460 *lst = cur;
13461 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013462#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013463 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13464 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13465 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013466 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13467 oldctxt->myDoc, cur);
13468 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013469#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013470 cur->parent = NULL;
13471 cur = cur->next;
13472 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013473 ctxt->myDoc->children->children = NULL;
13474 }
13475 if (ctxt->myDoc != NULL) {
13476 xmlFreeNode(ctxt->myDoc->children);
13477 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013478 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013479 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013480
13481 /*
13482 * Record in the parent context the number of entities replacement
13483 * done when parsing that reference.
13484 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013485 if (oldctxt != NULL)
13486 oldctxt->nbentities += ctxt->nbentities;
13487
Daniel Veillard0161e632008-08-28 15:36:32 +000013488 /*
13489 * Also record the last error if any
13490 */
13491 if (ctxt->lastError.code != XML_ERR_OK)
13492 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13493
Daniel Veillard328f48c2002-11-15 15:24:34 +000013494 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013495 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013496 ctxt->attsDefault = NULL;
13497 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013498 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013499 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013500 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013501 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013502
Daniel Veillard328f48c2002-11-15 15:24:34 +000013503 return(ret);
13504}
13505
Daniel Veillard29b17482004-08-16 00:39:03 +000013506/**
13507 * xmlParseInNodeContext:
13508 * @node: the context node
13509 * @data: the input string
13510 * @datalen: the input string length in bytes
13511 * @options: a combination of xmlParserOption
13512 * @lst: the return value for the set of parsed nodes
13513 *
13514 * Parse a well-balanced chunk of an XML document
13515 * within the context (DTD, namespaces, etc ...) of the given node.
13516 *
13517 * The allowed sequence for the data is a Well Balanced Chunk defined by
13518 * the content production in the XML grammar:
13519 *
13520 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13521 *
13522 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13523 * error code otherwise
13524 */
13525xmlParserErrors
13526xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13527 int options, xmlNodePtr *lst) {
13528#ifdef SAX2
13529 xmlParserCtxtPtr ctxt;
13530 xmlDocPtr doc = NULL;
13531 xmlNodePtr fake, cur;
13532 int nsnr = 0;
13533
13534 xmlParserErrors ret = XML_ERR_OK;
13535
13536 /*
13537 * check all input parameters, grab the document
13538 */
13539 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13540 return(XML_ERR_INTERNAL_ERROR);
13541 switch (node->type) {
13542 case XML_ELEMENT_NODE:
13543 case XML_ATTRIBUTE_NODE:
13544 case XML_TEXT_NODE:
13545 case XML_CDATA_SECTION_NODE:
13546 case XML_ENTITY_REF_NODE:
13547 case XML_PI_NODE:
13548 case XML_COMMENT_NODE:
13549 case XML_DOCUMENT_NODE:
13550 case XML_HTML_DOCUMENT_NODE:
13551 break;
13552 default:
13553 return(XML_ERR_INTERNAL_ERROR);
13554
13555 }
13556 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13557 (node->type != XML_DOCUMENT_NODE) &&
13558 (node->type != XML_HTML_DOCUMENT_NODE))
13559 node = node->parent;
13560 if (node == NULL)
13561 return(XML_ERR_INTERNAL_ERROR);
13562 if (node->type == XML_ELEMENT_NODE)
13563 doc = node->doc;
13564 else
13565 doc = (xmlDocPtr) node;
13566 if (doc == NULL)
13567 return(XML_ERR_INTERNAL_ERROR);
13568
13569 /*
13570 * allocate a context and set-up everything not related to the
13571 * node position in the tree
13572 */
13573 if (doc->type == XML_DOCUMENT_NODE)
13574 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13575#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013576 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013577 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013578 /*
13579 * When parsing in context, it makes no sense to add implied
13580 * elements like html/body/etc...
13581 */
13582 options |= HTML_PARSE_NOIMPLIED;
13583 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013584#endif
13585 else
13586 return(XML_ERR_INTERNAL_ERROR);
13587
13588 if (ctxt == NULL)
13589 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013590
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013591 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013592 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13593 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13594 * we must wait until the last moment to free the original one.
13595 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013596 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013597 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013598 xmlDictFree(ctxt->dict);
13599 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013600 } else
13601 options |= XML_PARSE_NODICT;
13602
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013603 if (doc->encoding != NULL) {
13604 xmlCharEncodingHandlerPtr hdlr;
13605
13606 if (ctxt->encoding != NULL)
13607 xmlFree((xmlChar *) ctxt->encoding);
13608 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13609
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013610 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013611 if (hdlr != NULL) {
13612 xmlSwitchToEncoding(ctxt, hdlr);
13613 } else {
13614 return(XML_ERR_UNSUPPORTED_ENCODING);
13615 }
13616 }
13617
Daniel Veillard37334572008-07-31 08:20:02 +000013618 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013619 xmlDetectSAX2(ctxt);
13620 ctxt->myDoc = doc;
13621
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013622 fake = xmlNewComment(NULL);
13623 if (fake == NULL) {
13624 xmlFreeParserCtxt(ctxt);
13625 return(XML_ERR_NO_MEMORY);
13626 }
13627 xmlAddChild(node, fake);
13628
Daniel Veillard29b17482004-08-16 00:39:03 +000013629 if (node->type == XML_ELEMENT_NODE) {
13630 nodePush(ctxt, node);
13631 /*
13632 * initialize the SAX2 namespaces stack
13633 */
13634 cur = node;
13635 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13636 xmlNsPtr ns = cur->nsDef;
13637 const xmlChar *iprefix, *ihref;
13638
13639 while (ns != NULL) {
13640 if (ctxt->dict) {
13641 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13642 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13643 } else {
13644 iprefix = ns->prefix;
13645 ihref = ns->href;
13646 }
13647
13648 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13649 nsPush(ctxt, iprefix, ihref);
13650 nsnr++;
13651 }
13652 ns = ns->next;
13653 }
13654 cur = cur->parent;
13655 }
13656 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013657 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013658
13659 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13660 /*
13661 * ID/IDREF registration will be done in xmlValidateElement below
13662 */
13663 ctxt->loadsubset |= XML_SKIP_IDS;
13664 }
13665
Daniel Veillard499cc922006-01-18 17:22:35 +000013666#ifdef LIBXML_HTML_ENABLED
13667 if (doc->type == XML_HTML_DOCUMENT_NODE)
13668 __htmlParseContent(ctxt);
13669 else
13670#endif
13671 xmlParseContent(ctxt);
13672
Daniel Veillard29b17482004-08-16 00:39:03 +000013673 nsPop(ctxt, nsnr);
13674 if ((RAW == '<') && (NXT(1) == '/')) {
13675 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13676 } else if (RAW != 0) {
13677 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13678 }
13679 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13680 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13681 ctxt->wellFormed = 0;
13682 }
13683
13684 if (!ctxt->wellFormed) {
13685 if (ctxt->errNo == 0)
13686 ret = XML_ERR_INTERNAL_ERROR;
13687 else
13688 ret = (xmlParserErrors)ctxt->errNo;
13689 } else {
13690 ret = XML_ERR_OK;
13691 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013692
Daniel Veillard29b17482004-08-16 00:39:03 +000013693 /*
13694 * Return the newly created nodeset after unlinking it from
13695 * the pseudo sibling.
13696 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013697
Daniel Veillard29b17482004-08-16 00:39:03 +000013698 cur = fake->next;
13699 fake->next = NULL;
13700 node->last = fake;
13701
13702 if (cur != NULL) {
13703 cur->prev = NULL;
13704 }
13705
13706 *lst = cur;
13707
13708 while (cur != NULL) {
13709 cur->parent = NULL;
13710 cur = cur->next;
13711 }
13712
13713 xmlUnlinkNode(fake);
13714 xmlFreeNode(fake);
13715
13716
13717 if (ret != XML_ERR_OK) {
13718 xmlFreeNodeList(*lst);
13719 *lst = NULL;
13720 }
William M. Brackc3f81342004-10-03 01:22:44 +000013721
William M. Brackb7b54de2004-10-06 16:38:01 +000013722 if (doc->dict != NULL)
13723 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013724 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013725
Daniel Veillard29b17482004-08-16 00:39:03 +000013726 return(ret);
13727#else /* !SAX2 */
13728 return(XML_ERR_INTERNAL_ERROR);
13729#endif
13730}
13731
Daniel Veillard81273902003-09-30 00:43:48 +000013732#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013733/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013734 * xmlParseBalancedChunkMemoryRecover:
13735 * @doc: the document the chunk pertains to
13736 * @sax: the SAX handler bloc (possibly NULL)
13737 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13738 * @depth: Used for loop detection, use 0
13739 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13740 * @lst: the return value for the set of parsed nodes
13741 * @recover: return nodes even if the data is broken (use 0)
13742 *
13743 *
13744 * Parse a well-balanced chunk of an XML document
13745 * called by the parser
13746 * The allowed sequence for the Well Balanced Chunk is the one defined by
13747 * the content production in the XML grammar:
13748 *
13749 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13750 *
13751 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13752 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013753 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013754 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013755 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13756 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013757 */
13758int
13759xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013760 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013761 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013762 xmlParserCtxtPtr ctxt;
13763 xmlDocPtr newDoc;
13764 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013765 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013766 int size;
13767 int ret = 0;
13768
Daniel Veillard0161e632008-08-28 15:36:32 +000013769 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013770 return(XML_ERR_ENTITY_LOOP);
13771 }
13772
13773
Daniel Veillardcda96922001-08-21 10:56:31 +000013774 if (lst != NULL)
13775 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013776 if (string == NULL)
13777 return(-1);
13778
13779 size = xmlStrlen(string);
13780
13781 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13782 if (ctxt == NULL) return(-1);
13783 ctxt->userData = ctxt;
13784 if (sax != NULL) {
13785 oldsax = ctxt->sax;
13786 ctxt->sax = sax;
13787 if (user_data != NULL)
13788 ctxt->userData = user_data;
13789 }
13790 newDoc = xmlNewDoc(BAD_CAST "1.0");
13791 if (newDoc == NULL) {
13792 xmlFreeParserCtxt(ctxt);
13793 return(-1);
13794 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013795 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013796 if ((doc != NULL) && (doc->dict != NULL)) {
13797 xmlDictFree(ctxt->dict);
13798 ctxt->dict = doc->dict;
13799 xmlDictReference(ctxt->dict);
13800 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13801 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13802 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13803 ctxt->dictNames = 1;
13804 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013805 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013806 }
Owen Taylor3473f882001-02-23 17:55:21 +000013807 if (doc != NULL) {
13808 newDoc->intSubset = doc->intSubset;
13809 newDoc->extSubset = doc->extSubset;
13810 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013811 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13812 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013813 if (sax != NULL)
13814 ctxt->sax = oldsax;
13815 xmlFreeParserCtxt(ctxt);
13816 newDoc->intSubset = NULL;
13817 newDoc->extSubset = NULL;
13818 xmlFreeDoc(newDoc);
13819 return(-1);
13820 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013821 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13822 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013823 if (doc == NULL) {
13824 ctxt->myDoc = newDoc;
13825 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013826 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013827 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013828 /* Ensure that doc has XML spec namespace */
13829 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13830 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013831 }
13832 ctxt->instate = XML_PARSER_CONTENT;
13833 ctxt->depth = depth;
13834
13835 /*
13836 * Doing validity checking on chunk doesn't make sense
13837 */
13838 ctxt->validate = 0;
13839 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013840 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013841
Daniel Veillardb39bc392002-10-26 19:29:51 +000013842 if ( doc != NULL ){
13843 content = doc->children;
13844 doc->children = NULL;
13845 xmlParseContent(ctxt);
13846 doc->children = content;
13847 }
13848 else {
13849 xmlParseContent(ctxt);
13850 }
Owen Taylor3473f882001-02-23 17:55:21 +000013851 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013852 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013853 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013854 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013855 }
13856 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013857 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013858 }
13859
13860 if (!ctxt->wellFormed) {
13861 if (ctxt->errNo == 0)
13862 ret = 1;
13863 else
13864 ret = ctxt->errNo;
13865 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013866 ret = 0;
13867 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013868
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013869 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13870 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013871
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013872 /*
13873 * Return the newly created nodeset after unlinking it from
13874 * they pseudo parent.
13875 */
13876 cur = newDoc->children->children;
13877 *lst = cur;
13878 while (cur != NULL) {
13879 xmlSetTreeDoc(cur, doc);
13880 cur->parent = NULL;
13881 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013882 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013883 newDoc->children->children = NULL;
13884 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013885
13886 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013887 ctxt->sax = oldsax;
13888 xmlFreeParserCtxt(ctxt);
13889 newDoc->intSubset = NULL;
13890 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013891 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013892 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013893
Owen Taylor3473f882001-02-23 17:55:21 +000013894 return(ret);
13895}
13896
13897/**
13898 * xmlSAXParseEntity:
13899 * @sax: the SAX handler block
13900 * @filename: the filename
13901 *
13902 * parse an XML external entity out of context and build a tree.
13903 * It use the given SAX function block to handle the parsing callback.
13904 * If sax is NULL, fallback to the default DOM tree building routines.
13905 *
13906 * [78] extParsedEnt ::= TextDecl? content
13907 *
13908 * This correspond to a "Well Balanced" chunk
13909 *
13910 * Returns the resulting document tree
13911 */
13912
13913xmlDocPtr
13914xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13915 xmlDocPtr ret;
13916 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013917
13918 ctxt = xmlCreateFileParserCtxt(filename);
13919 if (ctxt == NULL) {
13920 return(NULL);
13921 }
13922 if (sax != NULL) {
13923 if (ctxt->sax != NULL)
13924 xmlFree(ctxt->sax);
13925 ctxt->sax = sax;
13926 ctxt->userData = NULL;
13927 }
13928
Owen Taylor3473f882001-02-23 17:55:21 +000013929 xmlParseExtParsedEnt(ctxt);
13930
13931 if (ctxt->wellFormed)
13932 ret = ctxt->myDoc;
13933 else {
13934 ret = NULL;
13935 xmlFreeDoc(ctxt->myDoc);
13936 ctxt->myDoc = NULL;
13937 }
13938 if (sax != NULL)
13939 ctxt->sax = NULL;
13940 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013941
Owen Taylor3473f882001-02-23 17:55:21 +000013942 return(ret);
13943}
13944
13945/**
13946 * xmlParseEntity:
13947 * @filename: the filename
13948 *
13949 * parse an XML external entity out of context and build a tree.
13950 *
13951 * [78] extParsedEnt ::= TextDecl? content
13952 *
13953 * This correspond to a "Well Balanced" chunk
13954 *
13955 * Returns the resulting document tree
13956 */
13957
13958xmlDocPtr
13959xmlParseEntity(const char *filename) {
13960 return(xmlSAXParseEntity(NULL, filename));
13961}
Daniel Veillard81273902003-09-30 00:43:48 +000013962#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013963
13964/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013965 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013966 * @URL: the entity URL
13967 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013968 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013969 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013970 *
13971 * Create a parser context for an external entity
13972 * Automatic support for ZLIB/Compress compressed document is provided
13973 * by default if found at compile-time.
13974 *
13975 * Returns the new parser context or NULL
13976 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013977static xmlParserCtxtPtr
13978xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13979 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013980 xmlParserCtxtPtr ctxt;
13981 xmlParserInputPtr inputStream;
13982 char *directory = NULL;
13983 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013984
Owen Taylor3473f882001-02-23 17:55:21 +000013985 ctxt = xmlNewParserCtxt();
13986 if (ctxt == NULL) {
13987 return(NULL);
13988 }
13989
Daniel Veillard48247b42009-07-10 16:12:46 +020013990 if (pctx != NULL) {
13991 ctxt->options = pctx->options;
13992 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013993 }
13994
Owen Taylor3473f882001-02-23 17:55:21 +000013995 uri = xmlBuildURI(URL, base);
13996
13997 if (uri == NULL) {
13998 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13999 if (inputStream == NULL) {
14000 xmlFreeParserCtxt(ctxt);
14001 return(NULL);
14002 }
14003
14004 inputPush(ctxt, inputStream);
14005
14006 if ((ctxt->directory == NULL) && (directory == NULL))
14007 directory = xmlParserGetDirectory((char *)URL);
14008 if ((ctxt->directory == NULL) && (directory != NULL))
14009 ctxt->directory = directory;
14010 } else {
14011 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14012 if (inputStream == NULL) {
14013 xmlFree(uri);
14014 xmlFreeParserCtxt(ctxt);
14015 return(NULL);
14016 }
14017
14018 inputPush(ctxt, inputStream);
14019
14020 if ((ctxt->directory == NULL) && (directory == NULL))
14021 directory = xmlParserGetDirectory((char *)uri);
14022 if ((ctxt->directory == NULL) && (directory != NULL))
14023 ctxt->directory = directory;
14024 xmlFree(uri);
14025 }
Owen Taylor3473f882001-02-23 17:55:21 +000014026 return(ctxt);
14027}
14028
Rob Richards9c0aa472009-03-26 18:10:19 +000014029/**
14030 * xmlCreateEntityParserCtxt:
14031 * @URL: the entity URL
14032 * @ID: the entity PUBLIC ID
14033 * @base: a possible base for the target URI
14034 *
14035 * Create a parser context for an external entity
14036 * Automatic support for ZLIB/Compress compressed document is provided
14037 * by default if found at compile-time.
14038 *
14039 * Returns the new parser context or NULL
14040 */
14041xmlParserCtxtPtr
14042xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14043 const xmlChar *base) {
14044 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14045
14046}
14047
Owen Taylor3473f882001-02-23 17:55:21 +000014048/************************************************************************
14049 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014050 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014051 * *
14052 ************************************************************************/
14053
14054/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014055 * xmlCreateURLParserCtxt:
14056 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014057 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014058 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014059 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014060 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014061 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014062 *
14063 * Returns the new parser context or NULL
14064 */
14065xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014066xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014067{
14068 xmlParserCtxtPtr ctxt;
14069 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014070 char *directory = NULL;
14071
Owen Taylor3473f882001-02-23 17:55:21 +000014072 ctxt = xmlNewParserCtxt();
14073 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014074 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014075 return(NULL);
14076 }
14077
Daniel Veillarddf292f72005-01-16 19:00:15 +000014078 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014079 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014080 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014081
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014082 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014083 if (inputStream == NULL) {
14084 xmlFreeParserCtxt(ctxt);
14085 return(NULL);
14086 }
14087
Owen Taylor3473f882001-02-23 17:55:21 +000014088 inputPush(ctxt, inputStream);
14089 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014090 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014091 if ((ctxt->directory == NULL) && (directory != NULL))
14092 ctxt->directory = directory;
14093
14094 return(ctxt);
14095}
14096
Daniel Veillard61b93382003-11-03 14:28:31 +000014097/**
14098 * xmlCreateFileParserCtxt:
14099 * @filename: the filename
14100 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014101 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014102 * Automatic support for ZLIB/Compress compressed document is provided
14103 * by default if found at compile-time.
14104 *
14105 * Returns the new parser context or NULL
14106 */
14107xmlParserCtxtPtr
14108xmlCreateFileParserCtxt(const char *filename)
14109{
14110 return(xmlCreateURLParserCtxt(filename, 0));
14111}
14112
Daniel Veillard81273902003-09-30 00:43:48 +000014113#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014114/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014115 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014116 * @sax: the SAX handler block
14117 * @filename: the filename
14118 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14119 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014120 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014121 *
14122 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14123 * compressed document is provided by default if found at compile-time.
14124 * It use the given SAX function block to handle the parsing callback.
14125 * If sax is NULL, fallback to the default DOM tree building routines.
14126 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014127 * User data (void *) is stored within the parser context in the
14128 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014129 *
Owen Taylor3473f882001-02-23 17:55:21 +000014130 * Returns the resulting document tree
14131 */
14132
14133xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014134xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14135 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014136 xmlDocPtr ret;
14137 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014138
Daniel Veillard635ef722001-10-29 11:48:19 +000014139 xmlInitParser();
14140
Owen Taylor3473f882001-02-23 17:55:21 +000014141 ctxt = xmlCreateFileParserCtxt(filename);
14142 if (ctxt == NULL) {
14143 return(NULL);
14144 }
14145 if (sax != NULL) {
14146 if (ctxt->sax != NULL)
14147 xmlFree(ctxt->sax);
14148 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014149 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014150 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014151 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014152 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014153 }
Owen Taylor3473f882001-02-23 17:55:21 +000014154
Daniel Veillard37d2d162008-03-14 10:54:00 +000014155 if (ctxt->directory == NULL)
14156 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014157
Daniel Veillarddad3f682002-11-17 16:47:27 +000014158 ctxt->recovery = recovery;
14159
Owen Taylor3473f882001-02-23 17:55:21 +000014160 xmlParseDocument(ctxt);
14161
William M. Brackc07329e2003-09-08 01:57:30 +000014162 if ((ctxt->wellFormed) || recovery) {
14163 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014164 if (ret != NULL) {
14165 if (ctxt->input->buf->compressed > 0)
14166 ret->compression = 9;
14167 else
14168 ret->compression = ctxt->input->buf->compressed;
14169 }
William M. Brackc07329e2003-09-08 01:57:30 +000014170 }
Owen Taylor3473f882001-02-23 17:55:21 +000014171 else {
14172 ret = NULL;
14173 xmlFreeDoc(ctxt->myDoc);
14174 ctxt->myDoc = NULL;
14175 }
14176 if (sax != NULL)
14177 ctxt->sax = NULL;
14178 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014179
Owen Taylor3473f882001-02-23 17:55:21 +000014180 return(ret);
14181}
14182
14183/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014184 * xmlSAXParseFile:
14185 * @sax: the SAX handler block
14186 * @filename: the filename
14187 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14188 * documents
14189 *
14190 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14191 * compressed document is provided by default if found at compile-time.
14192 * It use the given SAX function block to handle the parsing callback.
14193 * If sax is NULL, fallback to the default DOM tree building routines.
14194 *
14195 * Returns the resulting document tree
14196 */
14197
14198xmlDocPtr
14199xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14200 int recovery) {
14201 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14202}
14203
14204/**
Owen Taylor3473f882001-02-23 17:55:21 +000014205 * xmlRecoverDoc:
14206 * @cur: a pointer to an array of xmlChar
14207 *
14208 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014209 * In the case the document is not Well Formed, a attempt to build a
14210 * tree is tried anyway
14211 *
14212 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014213 */
14214
14215xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014216xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014217 return(xmlSAXParseDoc(NULL, cur, 1));
14218}
14219
14220/**
14221 * xmlParseFile:
14222 * @filename: the filename
14223 *
14224 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14225 * compressed document is provided by default if found at compile-time.
14226 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014227 * Returns the resulting document tree if the file was wellformed,
14228 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014229 */
14230
14231xmlDocPtr
14232xmlParseFile(const char *filename) {
14233 return(xmlSAXParseFile(NULL, filename, 0));
14234}
14235
14236/**
14237 * xmlRecoverFile:
14238 * @filename: the filename
14239 *
14240 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14241 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014242 * In the case the document is not Well Formed, it attempts to build
14243 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014244 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014245 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014246 */
14247
14248xmlDocPtr
14249xmlRecoverFile(const char *filename) {
14250 return(xmlSAXParseFile(NULL, filename, 1));
14251}
14252
14253
14254/**
14255 * xmlSetupParserForBuffer:
14256 * @ctxt: an XML parser context
14257 * @buffer: a xmlChar * buffer
14258 * @filename: a file name
14259 *
14260 * Setup the parser context to parse a new buffer; Clears any prior
14261 * contents from the parser context. The buffer parameter must not be
14262 * NULL, but the filename parameter can be
14263 */
14264void
14265xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14266 const char* filename)
14267{
14268 xmlParserInputPtr input;
14269
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014270 if ((ctxt == NULL) || (buffer == NULL))
14271 return;
14272
Owen Taylor3473f882001-02-23 17:55:21 +000014273 input = xmlNewInputStream(ctxt);
14274 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014275 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014276 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014277 return;
14278 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014279
Owen Taylor3473f882001-02-23 17:55:21 +000014280 xmlClearParserCtxt(ctxt);
14281 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014282 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014283 input->base = buffer;
14284 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014285 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014286 inputPush(ctxt, input);
14287}
14288
14289/**
14290 * xmlSAXUserParseFile:
14291 * @sax: a SAX handler
14292 * @user_data: The user data returned on SAX callbacks
14293 * @filename: a file name
14294 *
14295 * parse an XML file and call the given SAX handler routines.
14296 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014297 *
Owen Taylor3473f882001-02-23 17:55:21 +000014298 * Returns 0 in case of success or a error number otherwise
14299 */
14300int
14301xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14302 const char *filename) {
14303 int ret = 0;
14304 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014305
Owen Taylor3473f882001-02-23 17:55:21 +000014306 ctxt = xmlCreateFileParserCtxt(filename);
14307 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014308 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014309 xmlFree(ctxt->sax);
14310 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014311 xmlDetectSAX2(ctxt);
14312
Owen Taylor3473f882001-02-23 17:55:21 +000014313 if (user_data != NULL)
14314 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014315
Owen Taylor3473f882001-02-23 17:55:21 +000014316 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014317
Owen Taylor3473f882001-02-23 17:55:21 +000014318 if (ctxt->wellFormed)
14319 ret = 0;
14320 else {
14321 if (ctxt->errNo != 0)
14322 ret = ctxt->errNo;
14323 else
14324 ret = -1;
14325 }
14326 if (sax != NULL)
14327 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014328 if (ctxt->myDoc != NULL) {
14329 xmlFreeDoc(ctxt->myDoc);
14330 ctxt->myDoc = NULL;
14331 }
Owen Taylor3473f882001-02-23 17:55:21 +000014332 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014333
Owen Taylor3473f882001-02-23 17:55:21 +000014334 return ret;
14335}
Daniel Veillard81273902003-09-30 00:43:48 +000014336#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014337
14338/************************************************************************
14339 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014340 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014341 * *
14342 ************************************************************************/
14343
14344/**
14345 * xmlCreateMemoryParserCtxt:
14346 * @buffer: a pointer to a char array
14347 * @size: the size of the array
14348 *
14349 * Create a parser context for an XML in-memory document.
14350 *
14351 * Returns the new parser context or NULL
14352 */
14353xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014354xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014355 xmlParserCtxtPtr ctxt;
14356 xmlParserInputPtr input;
14357 xmlParserInputBufferPtr buf;
14358
14359 if (buffer == NULL)
14360 return(NULL);
14361 if (size <= 0)
14362 return(NULL);
14363
14364 ctxt = xmlNewParserCtxt();
14365 if (ctxt == NULL)
14366 return(NULL);
14367
Daniel Veillard53350552003-09-18 13:35:51 +000014368 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014369 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014370 if (buf == NULL) {
14371 xmlFreeParserCtxt(ctxt);
14372 return(NULL);
14373 }
Owen Taylor3473f882001-02-23 17:55:21 +000014374
14375 input = xmlNewInputStream(ctxt);
14376 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014377 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014378 xmlFreeParserCtxt(ctxt);
14379 return(NULL);
14380 }
14381
14382 input->filename = NULL;
14383 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014384 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014385
14386 inputPush(ctxt, input);
14387 return(ctxt);
14388}
14389
Daniel Veillard81273902003-09-30 00:43:48 +000014390#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014391/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014392 * xmlSAXParseMemoryWithData:
14393 * @sax: the SAX handler block
14394 * @buffer: an pointer to a char array
14395 * @size: the size of the array
14396 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14397 * documents
14398 * @data: the userdata
14399 *
14400 * parse an XML in-memory block and use the given SAX function block
14401 * to handle the parsing callback. If sax is NULL, fallback to the default
14402 * DOM tree building routines.
14403 *
14404 * User data (void *) is stored within the parser context in the
14405 * context's _private member, so it is available nearly everywhere in libxml
14406 *
14407 * Returns the resulting document tree
14408 */
14409
14410xmlDocPtr
14411xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14412 int size, int recovery, void *data) {
14413 xmlDocPtr ret;
14414 xmlParserCtxtPtr ctxt;
14415
Daniel Veillardab2a7632009-07-09 08:45:03 +020014416 xmlInitParser();
14417
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014418 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14419 if (ctxt == NULL) return(NULL);
14420 if (sax != NULL) {
14421 if (ctxt->sax != NULL)
14422 xmlFree(ctxt->sax);
14423 ctxt->sax = sax;
14424 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014425 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014426 if (data!=NULL) {
14427 ctxt->_private=data;
14428 }
14429
Daniel Veillardadba5f12003-04-04 16:09:01 +000014430 ctxt->recovery = recovery;
14431
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014432 xmlParseDocument(ctxt);
14433
14434 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14435 else {
14436 ret = NULL;
14437 xmlFreeDoc(ctxt->myDoc);
14438 ctxt->myDoc = NULL;
14439 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014440 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014441 ctxt->sax = NULL;
14442 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014443
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014444 return(ret);
14445}
14446
14447/**
Owen Taylor3473f882001-02-23 17:55:21 +000014448 * xmlSAXParseMemory:
14449 * @sax: the SAX handler block
14450 * @buffer: an pointer to a char array
14451 * @size: the size of the array
14452 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14453 * documents
14454 *
14455 * parse an XML in-memory block and use the given SAX function block
14456 * to handle the parsing callback. If sax is NULL, fallback to the default
14457 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014458 *
Owen Taylor3473f882001-02-23 17:55:21 +000014459 * Returns the resulting document tree
14460 */
14461xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014462xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14463 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014464 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014465}
14466
14467/**
14468 * xmlParseMemory:
14469 * @buffer: an pointer to a char array
14470 * @size: the size of the array
14471 *
14472 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014473 *
Owen Taylor3473f882001-02-23 17:55:21 +000014474 * Returns the resulting document tree
14475 */
14476
Daniel Veillard50822cb2001-07-26 20:05:51 +000014477xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014478 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14479}
14480
14481/**
14482 * xmlRecoverMemory:
14483 * @buffer: an pointer to a char array
14484 * @size: the size of the array
14485 *
14486 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014487 * In the case the document is not Well Formed, an attempt to
14488 * build a tree is tried anyway
14489 *
14490 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014491 */
14492
Daniel Veillard50822cb2001-07-26 20:05:51 +000014493xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014494 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14495}
14496
14497/**
14498 * xmlSAXUserParseMemory:
14499 * @sax: a SAX handler
14500 * @user_data: The user data returned on SAX callbacks
14501 * @buffer: an in-memory XML document input
14502 * @size: the length of the XML document in bytes
14503 *
14504 * A better SAX parsing routine.
14505 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014506 *
Owen Taylor3473f882001-02-23 17:55:21 +000014507 * Returns 0 in case of success or a error number otherwise
14508 */
14509int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014510 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014511 int ret = 0;
14512 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014513
14514 xmlInitParser();
14515
Owen Taylor3473f882001-02-23 17:55:21 +000014516 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14517 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014518 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14519 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014520 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014521 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014522
Daniel Veillard30211a02001-04-26 09:33:18 +000014523 if (user_data != NULL)
14524 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014525
Owen Taylor3473f882001-02-23 17:55:21 +000014526 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014527
Owen Taylor3473f882001-02-23 17:55:21 +000014528 if (ctxt->wellFormed)
14529 ret = 0;
14530 else {
14531 if (ctxt->errNo != 0)
14532 ret = ctxt->errNo;
14533 else
14534 ret = -1;
14535 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014536 if (sax != NULL)
14537 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014538 if (ctxt->myDoc != NULL) {
14539 xmlFreeDoc(ctxt->myDoc);
14540 ctxt->myDoc = NULL;
14541 }
Owen Taylor3473f882001-02-23 17:55:21 +000014542 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014543
Owen Taylor3473f882001-02-23 17:55:21 +000014544 return ret;
14545}
Daniel Veillard81273902003-09-30 00:43:48 +000014546#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014547
14548/**
14549 * xmlCreateDocParserCtxt:
14550 * @cur: a pointer to an array of xmlChar
14551 *
14552 * Creates a parser context for an XML in-memory document.
14553 *
14554 * Returns the new parser context or NULL
14555 */
14556xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014557xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014558 int len;
14559
14560 if (cur == NULL)
14561 return(NULL);
14562 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014563 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014564}
14565
Daniel Veillard81273902003-09-30 00:43:48 +000014566#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014567/**
14568 * xmlSAXParseDoc:
14569 * @sax: the SAX handler block
14570 * @cur: a pointer to an array of xmlChar
14571 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14572 * documents
14573 *
14574 * parse an XML in-memory document and build a tree.
14575 * It use the given SAX function block to handle the parsing callback.
14576 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014577 *
Owen Taylor3473f882001-02-23 17:55:21 +000014578 * Returns the resulting document tree
14579 */
14580
14581xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014582xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014583 xmlDocPtr ret;
14584 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014585 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014586
Daniel Veillard38936062004-11-04 17:45:11 +000014587 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014588
14589
14590 ctxt = xmlCreateDocParserCtxt(cur);
14591 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014592 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014593 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014594 ctxt->sax = sax;
14595 ctxt->userData = NULL;
14596 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014597 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014598
14599 xmlParseDocument(ctxt);
14600 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14601 else {
14602 ret = NULL;
14603 xmlFreeDoc(ctxt->myDoc);
14604 ctxt->myDoc = NULL;
14605 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014606 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014607 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014608 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014609
Owen Taylor3473f882001-02-23 17:55:21 +000014610 return(ret);
14611}
14612
14613/**
14614 * xmlParseDoc:
14615 * @cur: a pointer to an array of xmlChar
14616 *
14617 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014618 *
Owen Taylor3473f882001-02-23 17:55:21 +000014619 * Returns the resulting document tree
14620 */
14621
14622xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014623xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014624 return(xmlSAXParseDoc(NULL, cur, 0));
14625}
Daniel Veillard81273902003-09-30 00:43:48 +000014626#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014627
Daniel Veillard81273902003-09-30 00:43:48 +000014628#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014629/************************************************************************
14630 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014631 * Specific function to keep track of entities references *
14632 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014633 * *
14634 ************************************************************************/
14635
14636static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14637
14638/**
14639 * xmlAddEntityReference:
14640 * @ent : A valid entity
14641 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014642 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014643 *
14644 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14645 */
14646static void
14647xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14648 xmlNodePtr lastNode)
14649{
14650 if (xmlEntityRefFunc != NULL) {
14651 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14652 }
14653}
14654
14655
14656/**
14657 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014658 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014659 *
14660 * Set the function to call call back when a xml reference has been made
14661 */
14662void
14663xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14664{
14665 xmlEntityRefFunc = func;
14666}
Daniel Veillard81273902003-09-30 00:43:48 +000014667#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014668
14669/************************************************************************
14670 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014671 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014672 * *
14673 ************************************************************************/
14674
14675#ifdef LIBXML_XPATH_ENABLED
14676#include <libxml/xpath.h>
14677#endif
14678
Daniel Veillardffa3c742005-07-21 13:24:09 +000014679extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014680static int xmlParserInitialized = 0;
14681
14682/**
14683 * xmlInitParser:
14684 *
14685 * Initialization function for the XML parser.
14686 * This is not reentrant. Call once before processing in case of
14687 * use in multithreaded programs.
14688 */
14689
14690void
14691xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014692 if (xmlParserInitialized != 0)
14693 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014694
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014695#ifdef LIBXML_THREAD_ENABLED
14696 __xmlGlobalInitMutexLock();
14697 if (xmlParserInitialized == 0) {
14698#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014699 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014700 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014701 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14702 (xmlGenericError == NULL))
14703 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014704 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014705 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014706 xmlInitCharEncodingHandlers();
14707 xmlDefaultSAXHandlerInit();
14708 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014709#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014710 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014711#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014712#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014713 htmlInitAutoClose();
14714 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014715#endif
14716#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014717 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014718#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014719 xmlParserInitialized = 1;
14720#ifdef LIBXML_THREAD_ENABLED
14721 }
14722 __xmlGlobalInitMutexUnlock();
14723#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014724}
14725
14726/**
14727 * xmlCleanupParser:
14728 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014729 * This function name is somewhat misleading. It does not clean up
14730 * parser state, it cleans up memory allocated by the library itself.
14731 * It is a cleanup function for the XML library. It tries to reclaim all
14732 * related global memory allocated for the library processing.
14733 * It doesn't deallocate any document related memory. One should
14734 * call xmlCleanupParser() only when the process has finished using
14735 * the library and all XML/HTML documents built with it.
14736 * See also xmlInitParser() which has the opposite function of preparing
14737 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014738 *
14739 * WARNING: if your application is multithreaded or has plugin support
14740 * calling this may crash the application if another thread or
14741 * a plugin is still using libxml2. It's sometimes very hard to
14742 * guess if libxml2 is in use in the application, some libraries
14743 * or plugins may use it without notice. In case of doubt abstain
14744 * from calling this function or do it just before calling exit()
14745 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014746 */
14747
14748void
14749xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014750 if (!xmlParserInitialized)
14751 return;
14752
Owen Taylor3473f882001-02-23 17:55:21 +000014753 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014754#ifdef LIBXML_CATALOG_ENABLED
14755 xmlCatalogCleanup();
14756#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014757 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014758 xmlCleanupInputCallbacks();
14759#ifdef LIBXML_OUTPUT_ENABLED
14760 xmlCleanupOutputCallbacks();
14761#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014762#ifdef LIBXML_SCHEMAS_ENABLED
14763 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014764 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014765#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014766 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014767 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014768 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014769 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014770 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014771}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014772
14773/************************************************************************
14774 * *
14775 * New set (2.6.0) of simpler and more flexible APIs *
14776 * *
14777 ************************************************************************/
14778
14779/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014780 * DICT_FREE:
14781 * @str: a string
14782 *
14783 * Free a string if it is not owned by the "dict" dictionnary in the
14784 * current scope
14785 */
14786#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014787 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014788 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14789 xmlFree((char *)(str));
14790
14791/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014792 * xmlCtxtReset:
14793 * @ctxt: an XML parser context
14794 *
14795 * Reset a parser context
14796 */
14797void
14798xmlCtxtReset(xmlParserCtxtPtr ctxt)
14799{
14800 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014801 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014802
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014803 if (ctxt == NULL)
14804 return;
14805
14806 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014807
14808 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14809 xmlFreeInputStream(input);
14810 }
14811 ctxt->inputNr = 0;
14812 ctxt->input = NULL;
14813
14814 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014815 if (ctxt->spaceTab != NULL) {
14816 ctxt->spaceTab[0] = -1;
14817 ctxt->space = &ctxt->spaceTab[0];
14818 } else {
14819 ctxt->space = NULL;
14820 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014821
14822
14823 ctxt->nodeNr = 0;
14824 ctxt->node = NULL;
14825
14826 ctxt->nameNr = 0;
14827 ctxt->name = NULL;
14828
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014829 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014830 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014831 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014832 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014833 DICT_FREE(ctxt->directory);
14834 ctxt->directory = NULL;
14835 DICT_FREE(ctxt->extSubURI);
14836 ctxt->extSubURI = NULL;
14837 DICT_FREE(ctxt->extSubSystem);
14838 ctxt->extSubSystem = NULL;
14839 if (ctxt->myDoc != NULL)
14840 xmlFreeDoc(ctxt->myDoc);
14841 ctxt->myDoc = NULL;
14842
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014843 ctxt->standalone = -1;
14844 ctxt->hasExternalSubset = 0;
14845 ctxt->hasPErefs = 0;
14846 ctxt->html = 0;
14847 ctxt->external = 0;
14848 ctxt->instate = XML_PARSER_START;
14849 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014850
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014851 ctxt->wellFormed = 1;
14852 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014853 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014854 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014855#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014856 ctxt->vctxt.userData = ctxt;
14857 ctxt->vctxt.error = xmlParserValidityError;
14858 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014859#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014860 ctxt->record_info = 0;
14861 ctxt->nbChars = 0;
14862 ctxt->checkIndex = 0;
14863 ctxt->inSubset = 0;
14864 ctxt->errNo = XML_ERR_OK;
14865 ctxt->depth = 0;
14866 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14867 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014868 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014869 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014870 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014871 xmlInitNodeInfoSeq(&ctxt->node_seq);
14872
14873 if (ctxt->attsDefault != NULL) {
14874 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14875 ctxt->attsDefault = NULL;
14876 }
14877 if (ctxt->attsSpecial != NULL) {
14878 xmlHashFree(ctxt->attsSpecial, NULL);
14879 ctxt->attsSpecial = NULL;
14880 }
14881
Daniel Veillard4432df22003-09-28 18:58:27 +000014882#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014883 if (ctxt->catalogs != NULL)
14884 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014885#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014886 if (ctxt->lastError.code != XML_ERR_OK)
14887 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014888}
14889
14890/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014891 * xmlCtxtResetPush:
14892 * @ctxt: an XML parser context
14893 * @chunk: a pointer to an array of chars
14894 * @size: number of chars in the array
14895 * @filename: an optional file name or URI
14896 * @encoding: the document encoding, or NULL
14897 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014898 * Reset a push parser context
14899 *
14900 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014901 */
14902int
14903xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14904 int size, const char *filename, const char *encoding)
14905{
14906 xmlParserInputPtr inputStream;
14907 xmlParserInputBufferPtr buf;
14908 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14909
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014910 if (ctxt == NULL)
14911 return(1);
14912
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014913 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14914 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14915
14916 buf = xmlAllocParserInputBuffer(enc);
14917 if (buf == NULL)
14918 return(1);
14919
14920 if (ctxt == NULL) {
14921 xmlFreeParserInputBuffer(buf);
14922 return(1);
14923 }
14924
14925 xmlCtxtReset(ctxt);
14926
14927 if (ctxt->pushTab == NULL) {
14928 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14929 sizeof(xmlChar *));
14930 if (ctxt->pushTab == NULL) {
14931 xmlErrMemory(ctxt, NULL);
14932 xmlFreeParserInputBuffer(buf);
14933 return(1);
14934 }
14935 }
14936
14937 if (filename == NULL) {
14938 ctxt->directory = NULL;
14939 } else {
14940 ctxt->directory = xmlParserGetDirectory(filename);
14941 }
14942
14943 inputStream = xmlNewInputStream(ctxt);
14944 if (inputStream == NULL) {
14945 xmlFreeParserInputBuffer(buf);
14946 return(1);
14947 }
14948
14949 if (filename == NULL)
14950 inputStream->filename = NULL;
14951 else
14952 inputStream->filename = (char *)
14953 xmlCanonicPath((const xmlChar *) filename);
14954 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014955 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014956
14957 inputPush(ctxt, inputStream);
14958
14959 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14960 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014961 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14962 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014963
14964 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14965
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014966 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014967#ifdef DEBUG_PUSH
14968 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14969#endif
14970 }
14971
14972 if (encoding != NULL) {
14973 xmlCharEncodingHandlerPtr hdlr;
14974
Daniel Veillard37334572008-07-31 08:20:02 +000014975 if (ctxt->encoding != NULL)
14976 xmlFree((xmlChar *) ctxt->encoding);
14977 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14978
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014979 hdlr = xmlFindCharEncodingHandler(encoding);
14980 if (hdlr != NULL) {
14981 xmlSwitchToEncoding(ctxt, hdlr);
14982 } else {
14983 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14984 "Unsupported encoding %s\n", BAD_CAST encoding);
14985 }
14986 } else if (enc != XML_CHAR_ENCODING_NONE) {
14987 xmlSwitchEncoding(ctxt, enc);
14988 }
14989
14990 return(0);
14991}
14992
Daniel Veillard37334572008-07-31 08:20:02 +000014993
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014994/**
Daniel Veillard37334572008-07-31 08:20:02 +000014995 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014996 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014997 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014998 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014999 *
15000 * Applies the options to the parser context
15001 *
15002 * Returns 0 in case of success, the set of unknown or unimplemented options
15003 * in case of error.
15004 */
Daniel Veillard37334572008-07-31 08:20:02 +000015005static int
15006xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015007{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015008 if (ctxt == NULL)
15009 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015010 if (encoding != NULL) {
15011 if (ctxt->encoding != NULL)
15012 xmlFree((xmlChar *) ctxt->encoding);
15013 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15014 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015015 if (options & XML_PARSE_RECOVER) {
15016 ctxt->recovery = 1;
15017 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015018 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015019 } else
15020 ctxt->recovery = 0;
15021 if (options & XML_PARSE_DTDLOAD) {
15022 ctxt->loadsubset = XML_DETECT_IDS;
15023 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015024 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015025 } else
15026 ctxt->loadsubset = 0;
15027 if (options & XML_PARSE_DTDATTR) {
15028 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15029 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015030 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015031 }
15032 if (options & XML_PARSE_NOENT) {
15033 ctxt->replaceEntities = 1;
15034 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15035 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015036 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015037 } else
15038 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015039 if (options & XML_PARSE_PEDANTIC) {
15040 ctxt->pedantic = 1;
15041 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015042 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015043 } else
15044 ctxt->pedantic = 0;
15045 if (options & XML_PARSE_NOBLANKS) {
15046 ctxt->keepBlanks = 0;
15047 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15048 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015049 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015050 } else
15051 ctxt->keepBlanks = 1;
15052 if (options & XML_PARSE_DTDVALID) {
15053 ctxt->validate = 1;
15054 if (options & XML_PARSE_NOWARNING)
15055 ctxt->vctxt.warning = NULL;
15056 if (options & XML_PARSE_NOERROR)
15057 ctxt->vctxt.error = NULL;
15058 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015059 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015060 } else
15061 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015062 if (options & XML_PARSE_NOWARNING) {
15063 ctxt->sax->warning = NULL;
15064 options -= XML_PARSE_NOWARNING;
15065 }
15066 if (options & XML_PARSE_NOERROR) {
15067 ctxt->sax->error = NULL;
15068 ctxt->sax->fatalError = NULL;
15069 options -= XML_PARSE_NOERROR;
15070 }
Daniel Veillard81273902003-09-30 00:43:48 +000015071#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015072 if (options & XML_PARSE_SAX1) {
15073 ctxt->sax->startElement = xmlSAX2StartElement;
15074 ctxt->sax->endElement = xmlSAX2EndElement;
15075 ctxt->sax->startElementNs = NULL;
15076 ctxt->sax->endElementNs = NULL;
15077 ctxt->sax->initialized = 1;
15078 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015079 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015080 }
Daniel Veillard81273902003-09-30 00:43:48 +000015081#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015082 if (options & XML_PARSE_NODICT) {
15083 ctxt->dictNames = 0;
15084 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015085 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015086 } else {
15087 ctxt->dictNames = 1;
15088 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015089 if (options & XML_PARSE_NOCDATA) {
15090 ctxt->sax->cdataBlock = NULL;
15091 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015092 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015093 }
15094 if (options & XML_PARSE_NSCLEAN) {
15095 ctxt->options |= XML_PARSE_NSCLEAN;
15096 options -= XML_PARSE_NSCLEAN;
15097 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015098 if (options & XML_PARSE_NONET) {
15099 ctxt->options |= XML_PARSE_NONET;
15100 options -= XML_PARSE_NONET;
15101 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015102 if (options & XML_PARSE_COMPACT) {
15103 ctxt->options |= XML_PARSE_COMPACT;
15104 options -= XML_PARSE_COMPACT;
15105 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015106 if (options & XML_PARSE_OLD10) {
15107 ctxt->options |= XML_PARSE_OLD10;
15108 options -= XML_PARSE_OLD10;
15109 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015110 if (options & XML_PARSE_NOBASEFIX) {
15111 ctxt->options |= XML_PARSE_NOBASEFIX;
15112 options -= XML_PARSE_NOBASEFIX;
15113 }
15114 if (options & XML_PARSE_HUGE) {
15115 ctxt->options |= XML_PARSE_HUGE;
15116 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015117 if (ctxt->dict != NULL)
15118 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015119 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015120 if (options & XML_PARSE_OLDSAX) {
15121 ctxt->options |= XML_PARSE_OLDSAX;
15122 options -= XML_PARSE_OLDSAX;
15123 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015124 if (options & XML_PARSE_IGNORE_ENC) {
15125 ctxt->options |= XML_PARSE_IGNORE_ENC;
15126 options -= XML_PARSE_IGNORE_ENC;
15127 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015128 if (options & XML_PARSE_BIG_LINES) {
15129 ctxt->options |= XML_PARSE_BIG_LINES;
15130 options -= XML_PARSE_BIG_LINES;
15131 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015132 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015133 return (options);
15134}
15135
15136/**
Daniel Veillard37334572008-07-31 08:20:02 +000015137 * xmlCtxtUseOptions:
15138 * @ctxt: an XML parser context
15139 * @options: a combination of xmlParserOption
15140 *
15141 * Applies the options to the parser context
15142 *
15143 * Returns 0 in case of success, the set of unknown or unimplemented options
15144 * in case of error.
15145 */
15146int
15147xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15148{
15149 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15150}
15151
15152/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015153 * xmlDoRead:
15154 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015155 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015156 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015157 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015158 * @reuse: keep the context for reuse
15159 *
15160 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015161 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015162 * Returns the resulting document tree or NULL
15163 */
15164static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015165xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15166 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015167{
15168 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015169
15170 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015171 if (encoding != NULL) {
15172 xmlCharEncodingHandlerPtr hdlr;
15173
15174 hdlr = xmlFindCharEncodingHandler(encoding);
15175 if (hdlr != NULL)
15176 xmlSwitchToEncoding(ctxt, hdlr);
15177 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015178 if ((URL != NULL) && (ctxt->input != NULL) &&
15179 (ctxt->input->filename == NULL))
15180 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015181 xmlParseDocument(ctxt);
15182 if ((ctxt->wellFormed) || ctxt->recovery)
15183 ret = ctxt->myDoc;
15184 else {
15185 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015186 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015187 xmlFreeDoc(ctxt->myDoc);
15188 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015189 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015190 ctxt->myDoc = NULL;
15191 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015192 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015193 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015194
15195 return (ret);
15196}
15197
15198/**
15199 * xmlReadDoc:
15200 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015201 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015202 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015203 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015204 *
15205 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015206 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015207 * Returns the resulting document tree
15208 */
15209xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015210xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015211{
15212 xmlParserCtxtPtr ctxt;
15213
15214 if (cur == NULL)
15215 return (NULL);
15216
15217 ctxt = xmlCreateDocParserCtxt(cur);
15218 if (ctxt == NULL)
15219 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015220 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015221}
15222
15223/**
15224 * xmlReadFile:
15225 * @filename: a file or URL
15226 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015227 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015228 *
15229 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015230 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015231 * Returns the resulting document tree
15232 */
15233xmlDocPtr
15234xmlReadFile(const char *filename, const char *encoding, int options)
15235{
15236 xmlParserCtxtPtr ctxt;
15237
Daniel Veillard61b93382003-11-03 14:28:31 +000015238 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015239 if (ctxt == NULL)
15240 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015241 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015242}
15243
15244/**
15245 * xmlReadMemory:
15246 * @buffer: a pointer to a char array
15247 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015248 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015249 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015250 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015251 *
15252 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015253 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015254 * Returns the resulting document tree
15255 */
15256xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015257xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015258{
15259 xmlParserCtxtPtr ctxt;
15260
15261 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15262 if (ctxt == NULL)
15263 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015264 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015265}
15266
15267/**
15268 * xmlReadFd:
15269 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015270 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015271 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015272 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015273 *
15274 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015275 * NOTE that the file descriptor will not be closed when the
15276 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015277 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015278 * Returns the resulting document tree
15279 */
15280xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015281xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015282{
15283 xmlParserCtxtPtr ctxt;
15284 xmlParserInputBufferPtr input;
15285 xmlParserInputPtr stream;
15286
15287 if (fd < 0)
15288 return (NULL);
15289
15290 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15291 if (input == NULL)
15292 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015293 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015294 ctxt = xmlNewParserCtxt();
15295 if (ctxt == NULL) {
15296 xmlFreeParserInputBuffer(input);
15297 return (NULL);
15298 }
15299 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15300 if (stream == NULL) {
15301 xmlFreeParserInputBuffer(input);
15302 xmlFreeParserCtxt(ctxt);
15303 return (NULL);
15304 }
15305 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015306 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015307}
15308
15309/**
15310 * xmlReadIO:
15311 * @ioread: an I/O read function
15312 * @ioclose: an I/O close function
15313 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015314 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015315 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015316 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015317 *
15318 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015319 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015320 * Returns the resulting document tree
15321 */
15322xmlDocPtr
15323xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015324 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015325{
15326 xmlParserCtxtPtr ctxt;
15327 xmlParserInputBufferPtr input;
15328 xmlParserInputPtr stream;
15329
15330 if (ioread == NULL)
15331 return (NULL);
15332
15333 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15334 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015335 if (input == NULL) {
15336 if (ioclose != NULL)
15337 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015338 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015339 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015340 ctxt = xmlNewParserCtxt();
15341 if (ctxt == NULL) {
15342 xmlFreeParserInputBuffer(input);
15343 return (NULL);
15344 }
15345 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15346 if (stream == NULL) {
15347 xmlFreeParserInputBuffer(input);
15348 xmlFreeParserCtxt(ctxt);
15349 return (NULL);
15350 }
15351 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015352 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015353}
15354
15355/**
15356 * xmlCtxtReadDoc:
15357 * @ctxt: an XML parser context
15358 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015359 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015360 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015361 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015362 *
15363 * parse an XML in-memory document and build a tree.
15364 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015365 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015366 * Returns the resulting document tree
15367 */
15368xmlDocPtr
15369xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015370 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015371{
15372 xmlParserInputPtr stream;
15373
15374 if (cur == NULL)
15375 return (NULL);
15376 if (ctxt == NULL)
15377 return (NULL);
15378
15379 xmlCtxtReset(ctxt);
15380
15381 stream = xmlNewStringInputStream(ctxt, cur);
15382 if (stream == NULL) {
15383 return (NULL);
15384 }
15385 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015386 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015387}
15388
15389/**
15390 * xmlCtxtReadFile:
15391 * @ctxt: an XML parser context
15392 * @filename: a file or URL
15393 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015394 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015395 *
15396 * parse an XML file from the filesystem or the network.
15397 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015398 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015399 * Returns the resulting document tree
15400 */
15401xmlDocPtr
15402xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15403 const char *encoding, int options)
15404{
15405 xmlParserInputPtr stream;
15406
15407 if (filename == NULL)
15408 return (NULL);
15409 if (ctxt == NULL)
15410 return (NULL);
15411
15412 xmlCtxtReset(ctxt);
15413
Daniel Veillard29614c72004-11-26 10:47:26 +000015414 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015415 if (stream == NULL) {
15416 return (NULL);
15417 }
15418 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015419 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015420}
15421
15422/**
15423 * xmlCtxtReadMemory:
15424 * @ctxt: an XML parser context
15425 * @buffer: a pointer to a char array
15426 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015427 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015429 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015430 *
15431 * parse an XML in-memory document and build a tree.
15432 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015433 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015434 * Returns the resulting document tree
15435 */
15436xmlDocPtr
15437xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015438 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015439{
15440 xmlParserInputBufferPtr input;
15441 xmlParserInputPtr stream;
15442
15443 if (ctxt == NULL)
15444 return (NULL);
15445 if (buffer == NULL)
15446 return (NULL);
15447
15448 xmlCtxtReset(ctxt);
15449
15450 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15451 if (input == NULL) {
15452 return(NULL);
15453 }
15454
15455 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15456 if (stream == NULL) {
15457 xmlFreeParserInputBuffer(input);
15458 return(NULL);
15459 }
15460
15461 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015462 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015463}
15464
15465/**
15466 * xmlCtxtReadFd:
15467 * @ctxt: an XML parser context
15468 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015469 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015470 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015471 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015472 *
15473 * parse an XML from a file descriptor and build a tree.
15474 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015475 * NOTE that the file descriptor will not be closed when the
15476 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015477 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015478 * Returns the resulting document tree
15479 */
15480xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015481xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15482 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015483{
15484 xmlParserInputBufferPtr input;
15485 xmlParserInputPtr stream;
15486
15487 if (fd < 0)
15488 return (NULL);
15489 if (ctxt == NULL)
15490 return (NULL);
15491
15492 xmlCtxtReset(ctxt);
15493
15494
15495 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15496 if (input == NULL)
15497 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015498 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015499 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15500 if (stream == NULL) {
15501 xmlFreeParserInputBuffer(input);
15502 return (NULL);
15503 }
15504 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015505 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015506}
15507
15508/**
15509 * xmlCtxtReadIO:
15510 * @ctxt: an XML parser context
15511 * @ioread: an I/O read function
15512 * @ioclose: an I/O close function
15513 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015514 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015515 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015516 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015517 *
15518 * parse an XML document from I/O functions and source and build a tree.
15519 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015520 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015521 * Returns the resulting document tree
15522 */
15523xmlDocPtr
15524xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15525 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015526 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015527 const char *encoding, int options)
15528{
15529 xmlParserInputBufferPtr input;
15530 xmlParserInputPtr stream;
15531
15532 if (ioread == NULL)
15533 return (NULL);
15534 if (ctxt == NULL)
15535 return (NULL);
15536
15537 xmlCtxtReset(ctxt);
15538
15539 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15540 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015541 if (input == NULL) {
15542 if (ioclose != NULL)
15543 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015544 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015545 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015546 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15547 if (stream == NULL) {
15548 xmlFreeParserInputBuffer(input);
15549 return (NULL);
15550 }
15551 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015552 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015553}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015554
15555#define bottom_parser
15556#include "elfgcchack.h"