blob: ba757eb123a5e5ac77fe7a8d0fb0253b3e467a0d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800125 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 if (replacement != 0) {
134 if (replacement < XML_MAX_TEXT_LENGTH)
135 return(0);
136
137 /*
138 * If the volume of entity copy reaches 10 times the
139 * amount of parsed data and over the large text threshold
140 * then that's very likely to be an abuse.
141 */
142 if (ctxt->input != NULL) {
143 consumed = ctxt->input->consumed +
144 (ctxt->input->cur - ctxt->input->base);
145 }
146 consumed += ctxt->sizeentities;
147
148 if (replacement < XML_PARSER_NON_LINEAR * consumed)
149 return(0);
150 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000151 /*
152 * Do the check based on the replacement size of the entity
153 */
154 if (size < XML_PARSER_BIG_ENTITY)
155 return(0);
156
157 /*
158 * A limit on the amount of text data reasonably used
159 */
160 if (ctxt->input != NULL) {
161 consumed = ctxt->input->consumed +
162 (ctxt->input->cur - ctxt->input->base);
163 }
164 consumed += ctxt->sizeentities;
165
166 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168 return (0);
169 } else if (ent != NULL) {
170 /*
171 * use the number of parsed entities in the replacement
172 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800173 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000174
175 /*
176 * The amount of data parsed counting entities size only once
177 */
178 if (ctxt->input != NULL) {
179 consumed = ctxt->input->consumed +
180 (ctxt->input->cur - ctxt->input->base);
181 }
182 consumed += ctxt->sizeentities;
183
184 /*
185 * Check the density of entities for the amount of data
186 * knowing an entity reference will take at least 3 bytes
187 */
188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189 return (0);
190 } else {
191 /*
192 * strange we got no data for checking just return
193 */
194 return (0);
195 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 return (1);
198}
199
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000200/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000201 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000203 * arbitrary depth limit for the XML documents that we allow to
204 * process. This is not a limitation of the parser but a safety
205 * boundary feature. It can be disabled with the XML_PARSE_HUGE
206 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000207 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000208unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard0fb18932003-09-07 09:14:37 +0000210
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000213#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000214#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000215#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216
Daniel Veillard1f972e92012-08-15 10:16:37 +0800217/**
218 * XML_PARSER_CHUNK_SIZE
219 *
220 * When calling GROW that's the minimal amount of data
221 * the parser expected to have received. It is not a hard
222 * limit but an optimization when reading strings like Names
223 * It is not strictly needed as long as inputs available characters
224 * are followed by 0, which should be provided by the I/O level
225 */
226#define XML_PARSER_CHUNK_SIZE 100
227
Owen Taylor3473f882001-02-23 17:55:21 +0000228/*
Owen Taylor3473f882001-02-23 17:55:21 +0000229 * List of XML prefixed PI allowed by W3C specs
230 */
231
Daniel Veillardb44025c2001-10-11 22:55:55 +0000232static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000233 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800234 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000235 NULL
236};
237
Daniel Veillarda07050d2003-10-19 14:46:32 +0000238
Owen Taylor3473f882001-02-23 17:55:21 +0000239/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200240static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000242
Daniel Veillard7d515752003-09-26 19:12:37 +0000243static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000244xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000246 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000247 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard37334572008-07-31 08:20:02 +0000249static int
250xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000252#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000253static void
254xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000256#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000257
Daniel Veillard7d515752003-09-26 19:12:37 +0000258static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000259xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000261
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000262static int
263xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264
Daniel Veillarde57ec792003-09-10 10:50:59 +0000265/************************************************************************
266 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800267 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 * *
269 ************************************************************************/
270
271/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 * xmlErrAttributeDup:
273 * @ctxt: an XML parser context
274 * @prefix: the attribute prefix
275 * @localname: the attribute localname
276 *
277 * Handle a redefinition of attribute error
278 */
279static void
280xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281 const xmlChar * localname)
282{
Daniel Veillard157fee02003-10-31 10:36:03 +0000283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284 (ctxt->instate == XML_PARSER_EOF))
285 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000286 if (ctxt != NULL)
287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200288
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000289 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 (const char *) localname, NULL, NULL, 0, 0,
293 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000294 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 (const char *) prefix, (const char *) localname,
298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000300 if (ctxt != NULL) {
301 ctxt->wellFormed = 0;
302 if (ctxt->recovery == 0)
303 ctxt->disableSAX = 1;
304 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305}
306
307/**
308 * xmlFatalErr:
309 * @ctxt: an XML parser context
310 * @error: the error number
311 * @extra: extra information string
312 *
313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314 */
315static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317{
318 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800319 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320
Daniel Veillard157fee02003-10-31 10:36:03 +0000321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322 (ctxt->instate == XML_PARSER_EOF))
323 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 switch (error) {
325 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800326 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800329 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800332 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "internal error";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800338 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800341 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800344 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800350 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800353 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800356 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "Fragment not allowed";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
440 case XML_ERR_CONDSEC_INVALID_KEYWORD:
441 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800442 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800445 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800448 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800451 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800454 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000456 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000492 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800495 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 if (info == NULL)
507 snprintf(errstr, 128, "%s\n", errmsg);
508 else
509 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000510 if (ctxt != NULL)
511 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000514 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL) {
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0)
518 ctxt->disableSAX = 1;
519 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520}
521
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000522/**
523 * xmlFatalErrMsg:
524 * @ctxt: an XML parser context
525 * @error: the error number
526 * @msg: the error message
527 *
528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529 */
530static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000533{
Daniel Veillard157fee02003-10-31 10:36:03 +0000534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535 (ctxt->instate == XML_PARSER_EOF))
536 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000537 if (ctxt != NULL)
538 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000546}
547
548/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000549 * xmlWarningMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 * @str2: extra data
555 *
556 * Handle a warning.
557 */
558static void
559xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg, const xmlChar *str1, const xmlChar *str2)
561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000563
Daniel Veillard157fee02003-10-31 10:36:03 +0000564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000569 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200570 if (ctxt != NULL) {
571 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000572 (ctxt->sax) ? ctxt->sax->warning : NULL,
573 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_WARNING, NULL, 0,
576 (const char *) str1, (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200578 } else {
579 __xmlRaiseError(schannel, NULL, NULL,
580 ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_WARNING, NULL, 0,
582 (const char *) str1, (const char *) str2, NULL, 0, 0,
583 msg, (const char *) str1, (const char *) str2);
584 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585}
586
587/**
588 * xmlValidityError:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the error message
592 * @str1: extra data
593 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000594 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595 */
596static void
597xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000598 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000599{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000600 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000601
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->errNo = error;
607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608 schannel = ctxt->sax->serror;
609 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200610 if (ctxt != NULL) {
611 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000612 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613 ctxt, NULL, XML_FROM_DTD, error,
614 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000615 (const char *) str2, NULL, 0, 0,
616 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000617 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200618 } else {
619 __xmlRaiseError(schannel, NULL, NULL,
620 ctxt, NULL, XML_FROM_DTD, error,
621 XML_ERR_ERROR, NULL, 0, (const char *) str1,
622 (const char *) str2, NULL, 0, 0,
623 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000624 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000625}
626
627/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000628 * xmlFatalErrMsgInt:
629 * @ctxt: an XML parser context
630 * @error: the error number
631 * @msg: the error message
632 * @val: an integer value
633 *
634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635 */
636static void
637xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000638 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000639{
Daniel Veillard157fee02003-10-31 10:36:03 +0000640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000643 if (ctxt != NULL)
644 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000645 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000648 if (ctxt != NULL) {
649 ctxt->wellFormed = 0;
650 if (ctxt->recovery == 0)
651 ctxt->disableSAX = 1;
652 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000653}
654
655/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000656 * xmlFatalErrMsgStrIntStr:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @str1: an string info
661 * @val: an integer value
662 * @str2: an string info
663 *
664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665 */
666static void
667xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800668 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000669 const xmlChar *str2)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678 NULL, 0, (const char *) str1, (const char *) str2,
679 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000680 if (ctxt != NULL) {
681 ctxt->wellFormed = 0;
682 if (ctxt->recovery == 0)
683 ctxt->disableSAX = 1;
684 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000685}
686
687/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000688 * xmlFatalErrMsgStr:
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the error message
692 * @val: a string value
693 *
694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695 */
696static void
697xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000698 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000699{
Daniel Veillard157fee02003-10-31 10:36:03 +0000700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701 (ctxt->instate == XML_PARSER_EOF))
702 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000706 XML_FROM_PARSER, error, XML_ERR_FATAL,
707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL) {
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
713 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000714}
715
716/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000717 * xmlErrMsgStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @val: a string value
722 *
723 * Handle a non fatal parser error
724 */
725static void
726xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727 const char *msg, const xmlChar * val)
728{
Daniel Veillard157fee02003-10-31 10:36:03 +0000729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730 (ctxt->instate == XML_PARSER_EOF))
731 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000732 if (ctxt != NULL)
733 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000735 XML_FROM_PARSER, error, XML_ERR_ERROR,
736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737 val);
738}
739
740/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000741 * xmlNsErr:
742 * @ctxt: an XML parser context
743 * @error: the error number
744 * @msg: the message
745 * @info1: extra information string
746 * @info2: extra information string
747 *
748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749 */
750static void
751xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000753 const xmlChar * info1, const xmlChar * info2,
754 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000755{
Daniel Veillard157fee02003-10-31 10:36:03 +0000756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757 (ctxt->instate == XML_PARSER_EOF))
758 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000759 if (ctxt != NULL)
760 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000762 XML_ERR_ERROR, NULL, 0, (const char *) info1,
763 (const char *) info2, (const char *) info3, 0, 0, msg,
764 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000765 if (ctxt != NULL)
766 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000767}
768
Daniel Veillard37334572008-07-31 08:20:02 +0000769/**
770 * xmlNsWarn
771 * @ctxt: an XML parser context
772 * @error: the error number
773 * @msg: the message
774 * @info1: extra information string
775 * @info2: extra information string
776 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800777 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000778 */
779static void
780xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781 const char *msg,
782 const xmlChar * info1, const xmlChar * info2,
783 const xmlChar * info3)
784{
785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786 (ctxt->instate == XML_PARSER_EOF))
787 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789 XML_ERR_WARNING, NULL, 0, (const char *) info1,
790 (const char *) info2, (const char *) info3, 0, 0, msg,
791 info1, info2, info3);
792}
793
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000794/************************************************************************
795 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800796 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797 * *
798 ************************************************************************/
799
800/**
801 * xmlHasFeature:
802 * @feature: the feature to be examined
803 *
804 * Examines if the library has been compiled with a given feature.
805 *
806 * Returns a non-zero value if the feature exist, otherwise zero.
807 * Returns zero (0) if the feature does not exist or an unknown
808 * unknown feature is requested, non-zero otherwise.
809 */
810int
811xmlHasFeature(xmlFeature feature)
812{
813 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_THREAD_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_TREE_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_OUTPUT_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_PUSH_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_READER_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_PATTERN_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_WRITER_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_SAX1_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_FTP_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_HTTP_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_VALID_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_HTML_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_LEGACY_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_C14N_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_CATALOG_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_XPATH_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_XPTR_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_XINCLUDE_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_ICONV_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_ISO8859X_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_UNICODE_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_REGEXP_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_AUTOMATA_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_EXPR_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_SCHEMAS_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_SCHEMATRON_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_MODULES_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_DEBUG_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef DEBUG_MEMORY_LOCATION
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_DEBUG_RUNTIME
990 return(1);
991#else
992 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000993#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000994 case XML_WITH_ZLIB:
995#ifdef LIBXML_ZLIB_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001000 case XML_WITH_LZMA:
1001#ifdef LIBXML_LZMA_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 case XML_WITH_ICU:
1007#ifdef LIBXML_ICU_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012 default:
1013 break;
1014 }
1015 return(0);
1016}
1017
1018/************************************************************************
1019 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001020 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 * *
1022 ************************************************************************/
1023
1024/**
1025 * xmlDetectSAX2:
1026 * @ctxt: an XML parser context
1027 *
1028 * Do the SAX2 detection and specific intialization
1029 */
1030static void
1031xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035 ((ctxt->sax->startElementNs != NULL) ||
1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001037#else
1038 ctxt->sax2 = 1;
1039#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040
1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001046 xmlErrMemory(ctxt, NULL);
1047 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001048}
1049
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050typedef struct _xmlDefAttrs xmlDefAttrs;
1051typedef xmlDefAttrs *xmlDefAttrsPtr;
1052struct _xmlDefAttrs {
1053 int nbAttrs; /* number of defaulted attributes on that element */
1054 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001055 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057
1058/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001059 * xmlAttrNormalizeSpace:
1060 * @src: the source string
1061 * @dst: the target string
1062 *
1063 * Normalize the space in non CDATA attribute values:
1064 * If the attribute type is not CDATA, then the XML processor MUST further
1065 * process the normalized attribute value by discarding any leading and
1066 * trailing space (#x20) characters, and by replacing sequences of space
1067 * (#x20) characters by a single space (#x20) character.
1068 * Note that the size of dst need to be at least src, and if one doesn't need
1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070 * passing src as dst is just fine.
1071 *
1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073 * is needed.
1074 */
1075static xmlChar *
1076xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077{
1078 if ((src == NULL) || (dst == NULL))
1079 return(NULL);
1080
1081 while (*src == 0x20) src++;
1082 while (*src != 0) {
1083 if (*src == 0x20) {
1084 while (*src == 0x20) src++;
1085 if (*src != 0)
1086 *dst++ = 0x20;
1087 } else {
1088 *dst++ = *src++;
1089 }
1090 }
1091 *dst = 0;
1092 if (dst == src)
1093 return(NULL);
1094 return(dst);
1095}
1096
1097/**
1098 * xmlAttrNormalizeSpace2:
1099 * @src: the source string
1100 *
1101 * Normalize the space in non CDATA attribute values, a slightly more complex
1102 * front end to avoid allocation problems when running on attribute values
1103 * coming from the input.
1104 *
1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106 * is needed.
1107 */
1108static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001109xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001110{
1111 int i;
1112 int remove_head = 0;
1113 int need_realloc = 0;
1114 const xmlChar *cur;
1115
1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117 return(NULL);
1118 i = *len;
1119 if (i <= 0)
1120 return(NULL);
1121
1122 cur = src;
1123 while (*cur == 0x20) {
1124 cur++;
1125 remove_head++;
1126 }
1127 while (*cur != 0) {
1128 if (*cur == 0x20) {
1129 cur++;
1130 if ((*cur == 0x20) || (*cur == 0)) {
1131 need_realloc = 1;
1132 break;
1133 }
1134 } else
1135 cur++;
1136 }
1137 if (need_realloc) {
1138 xmlChar *ret;
1139
1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141 if (ret == NULL) {
1142 xmlErrMemory(ctxt, NULL);
1143 return(NULL);
1144 }
1145 xmlAttrNormalizeSpace(ret, ret);
1146 *len = (int) strlen((const char *)ret);
1147 return(ret);
1148 } else if (remove_head) {
1149 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 memmove(src, src + remove_head, 1 + *len);
1151 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001152 }
1153 return(NULL);
1154}
1155
1156/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 * xmlAddDefAttrs:
1158 * @ctxt: an XML parser context
1159 * @fullname: the element fullname
1160 * @fullattr: the attribute fullname
1161 * @value: the attribute value
1162 *
1163 * Add a defaulted attribute for an element
1164 */
1165static void
1166xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167 const xmlChar *fullname,
1168 const xmlChar *fullattr,
1169 const xmlChar *value) {
1170 xmlDefAttrsPtr defaults;
1171 int len;
1172 const xmlChar *name;
1173 const xmlChar *prefix;
1174
Daniel Veillard6a31b832008-03-26 14:06:44 +00001175 /*
1176 * Allows to detect attribute redefinitions
1177 */
1178 if (ctxt->attsSpecial != NULL) {
1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180 return;
1181 }
1182
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 if (ctxt->attsDefault == NULL)
1186 goto mem_error;
1187 }
1188
1189 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 * split the element name into prefix:localname , the string found
1191 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 */
1193 name = xmlSplitQName3(fullname, &len);
1194 if (name == NULL) {
1195 name = xmlDictLookup(ctxt->dict, fullname, -1);
1196 prefix = NULL;
1197 } else {
1198 name = xmlDictLookup(ctxt->dict, name, -1);
1199 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200 }
1201
1202 /*
1203 * make sure there is some storage
1204 */
1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206 if (defaults == NULL) {
1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 if (defaults == NULL)
1210 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001212 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214 defaults, NULL) < 0) {
1215 xmlFree(defaults);
1216 goto mem_error;
1217 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001219 xmlDefAttrsPtr temp;
1220
1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001223 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228 defaults, NULL) < 0) {
1229 xmlFree(defaults);
1230 goto mem_error;
1231 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 }
1233
1234 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001235 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001236 * are within the DTD and hen not associated to namespace names.
1237 */
1238 name = xmlSplitQName3(fullattr, &len);
1239 if (name == NULL) {
1240 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241 prefix = NULL;
1242 } else {
1243 name = xmlDictLookup(ctxt->dict, name, -1);
1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245 }
1246
Daniel Veillardae0765b2008-07-31 19:54:59 +00001247 defaults->values[5 * defaults->nbAttrs] = name;
1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 /* intern the string and precompute the end */
1250 len = xmlStrlen(value);
1251 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001252 defaults->values[5 * defaults->nbAttrs + 2] = value;
1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254 if (ctxt->external)
1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256 else
1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 defaults->nbAttrs++;
1259
1260 return;
1261
1262mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001263 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 return;
1265}
1266
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001267/**
1268 * xmlAddSpecialAttr:
1269 * @ctxt: an XML parser context
1270 * @fullname: the element fullname
1271 * @fullattr: the attribute fullname
1272 * @type: the attribute type
1273 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001275 */
1276static void
1277xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278 const xmlChar *fullname,
1279 const xmlChar *fullattr,
1280 int type)
1281{
1282 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001284 if (ctxt->attsSpecial == NULL)
1285 goto mem_error;
1286 }
1287
Daniel Veillardac4118d2008-01-11 05:27:32 +00001288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289 return;
1290
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001293 return;
1294
1295mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001296 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001297 return;
1298}
1299
Daniel Veillard4432df22003-09-28 18:58:27 +00001300/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001301 * xmlCleanSpecialAttrCallback:
1302 *
1303 * Removes CDATA attributes from the special attribute table
1304 */
1305static void
1306xmlCleanSpecialAttrCallback(void *payload, void *data,
1307 const xmlChar *fullname, const xmlChar *fullattr,
1308 const xmlChar *unused ATTRIBUTE_UNUSED) {
1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001311 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313 }
1314}
1315
1316/**
1317 * xmlCleanSpecialAttr:
1318 * @ctxt: an XML parser context
1319 *
1320 * Trim the list of attributes defined to remove all those of type
1321 * CDATA as they are not special. This call should be done when finishing
1322 * to parse the DTD and before starting to parse the document root.
1323 */
1324static void
1325xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326{
1327 if (ctxt->attsSpecial == NULL)
1328 return;
1329
1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331
1332 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333 xmlHashFree(ctxt->attsSpecial, NULL);
1334 ctxt->attsSpecial = NULL;
1335 }
1336 return;
1337}
1338
1339/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001340 * xmlCheckLanguageID:
1341 * @lang: pointer to the string value
1342 *
1343 * Checks that the value conforms to the LanguageID production:
1344 *
1345 * NOTE: this is somewhat deprecated, those productions were removed from
1346 * the XML Second edition.
1347 *
1348 * [33] LanguageID ::= Langcode ('-' Subcode)*
1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353 * [38] Subcode ::= ([a-z] | [A-Z])+
1354 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001355 * The current REC reference the sucessors of RFC 1766, currently 5646
1356 *
1357 * http://www.rfc-editor.org/rfc/rfc5646.txt
1358 * langtag = language
1359 * ["-" script]
1360 * ["-" region]
1361 * *("-" variant)
1362 * *("-" extension)
1363 * ["-" privateuse]
1364 * language = 2*3ALPHA ; shortest ISO 639 code
1365 * ["-" extlang] ; sometimes followed by
1366 * ; extended language subtags
1367 * / 4ALPHA ; or reserved for future use
1368 * / 5*8ALPHA ; or registered language subtag
1369 *
1370 * extlang = 3ALPHA ; selected ISO 639 codes
1371 * *2("-" 3ALPHA) ; permanently reserved
1372 *
1373 * script = 4ALPHA ; ISO 15924 code
1374 *
1375 * region = 2ALPHA ; ISO 3166-1 code
1376 * / 3DIGIT ; UN M.49 code
1377 *
1378 * variant = 5*8alphanum ; registered variants
1379 * / (DIGIT 3alphanum)
1380 *
1381 * extension = singleton 1*("-" (2*8alphanum))
1382 *
1383 * ; Single alphanumerics
1384 * ; "x" reserved for private use
1385 * singleton = DIGIT ; 0 - 9
1386 * / %x41-57 ; A - W
1387 * / %x59-5A ; Y - Z
1388 * / %x61-77 ; a - w
1389 * / %x79-7A ; y - z
1390 *
1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392 * The parser below doesn't try to cope with extension or privateuse
1393 * that could be added but that's not interoperable anyway
1394 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001395 * Returns 1 if correct 0 otherwise
1396 **/
1397int
1398xmlCheckLanguageID(const xmlChar * lang)
1399{
Daniel Veillard60587d62010-11-04 15:16:27 +01001400 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001401
1402 if (cur == NULL)
1403 return (0);
1404 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001405 ((cur[0] == 'I') && (cur[1] == '-')) ||
1406 ((cur[0] == 'x') && (cur[1] == '-')) ||
1407 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001408 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001409 * Still allow IANA code and user code which were coming
1410 * from the previous version of the XML-1.0 specification
1411 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001412 */
1413 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001415 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001417 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001418 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001419 nxt = cur;
1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422 nxt++;
1423 if (nxt - cur >= 4) {
1424 /*
1425 * Reserved
1426 */
1427 if ((nxt - cur > 8) || (nxt[0] != 0))
1428 return(0);
1429 return(1);
1430 }
1431 if (nxt - cur < 2)
1432 return(0);
1433 /* we got an ISO 639 code */
1434 if (nxt[0] == 0)
1435 return(1);
1436 if (nxt[0] != '-')
1437 return(0);
1438
1439 nxt++;
1440 cur = nxt;
1441 /* now we can have extlang or script or region or variant */
1442 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443 goto region_m49;
1444
1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447 nxt++;
1448 if (nxt - cur == 4)
1449 goto script;
1450 if (nxt - cur == 2)
1451 goto region;
1452 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453 goto variant;
1454 if (nxt - cur != 3)
1455 return(0);
1456 /* we parsed an extlang */
1457 if (nxt[0] == 0)
1458 return(1);
1459 if (nxt[0] != '-')
1460 return(0);
1461
1462 nxt++;
1463 cur = nxt;
1464 /* now we can have script or region or variant */
1465 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466 goto region_m49;
1467
1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470 nxt++;
1471 if (nxt - cur == 2)
1472 goto region;
1473 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474 goto variant;
1475 if (nxt - cur != 4)
1476 return(0);
1477 /* we parsed a script */
1478script:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483
1484 nxt++;
1485 cur = nxt;
1486 /* now we can have region or variant */
1487 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488 goto region_m49;
1489
1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492 nxt++;
1493
1494 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495 goto variant;
1496 if (nxt - cur != 2)
1497 return(0);
1498 /* we parsed a region */
1499region:
1500 if (nxt[0] == 0)
1501 return(1);
1502 if (nxt[0] != '-')
1503 return(0);
1504
1505 nxt++;
1506 cur = nxt;
1507 /* now we can just have a variant */
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511
1512 if ((nxt - cur < 5) || (nxt - cur > 8))
1513 return(0);
1514
1515 /* we parsed a variant */
1516variant:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001522 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001523
1524region_m49:
1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527 nxt += 3;
1528 goto region;
1529 }
1530 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001531}
1532
Owen Taylor3473f882001-02-23 17:55:21 +00001533/************************************************************************
1534 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001535 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001536 * *
1537 ************************************************************************/
1538
Daniel Veillard8ed10722009-08-20 19:17:36 +02001539static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001541
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542#ifdef SAX2
1543/**
1544 * nsPush:
1545 * @ctxt: an XML parser context
1546 * @prefix: the namespace prefix or NULL
1547 * @URL: the namespace name
1548 *
1549 * Pushes a new parser namespace on top of the ns stack
1550 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001551 * Returns -1 in case of error, -2 if the namespace should be discarded
1552 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 */
1554static int
1555nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001557 if (ctxt->options & XML_PARSE_NSCLEAN) {
1558 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001560 if (ctxt->nsTab[i] == prefix) {
1561 /* in scope */
1562 if (ctxt->nsTab[i + 1] == URL)
1563 return(-2);
1564 /* out of scope keep it */
1565 break;
1566 }
1567 }
1568 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570 ctxt->nsMax = 10;
1571 ctxt->nsNr = 0;
1572 ctxt->nsTab = (const xmlChar **)
1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001575 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001576 ctxt->nsMax = 0;
1577 return (-1);
1578 }
1579 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001580 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001586 ctxt->nsMax /= 2;
1587 return (-1);
1588 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001589 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 }
1591 ctxt->nsTab[ctxt->nsNr++] = prefix;
1592 ctxt->nsTab[ctxt->nsNr++] = URL;
1593 return (ctxt->nsNr);
1594}
1595/**
1596 * nsPop:
1597 * @ctxt: an XML parser context
1598 * @nr: the number to pop
1599 *
1600 * Pops the top @nr parser prefix/namespace from the ns stack
1601 *
1602 * Returns the number of namespaces removed
1603 */
1604static int
1605nsPop(xmlParserCtxtPtr ctxt, int nr)
1606{
1607 int i;
1608
1609 if (ctxt->nsTab == NULL) return(0);
1610 if (ctxt->nsNr < nr) {
1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612 nr = ctxt->nsNr;
1613 }
1614 if (ctxt->nsNr <= 0)
1615 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001616
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 for (i = 0;i < nr;i++) {
1618 ctxt->nsNr--;
1619 ctxt->nsTab[ctxt->nsNr] = NULL;
1620 }
1621 return(nr);
1622}
1623#endif
1624
1625static int
1626xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001628 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001629 int maxatts;
1630
1631 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001633 atts = (const xmlChar **)
1634 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638 if (attallocs == NULL) goto mem_error;
1639 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001640 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001641 } else if (nr + 5 > ctxt->maxatts) {
1642 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001645 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001646 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648 (maxatts / 5) * sizeof(int));
1649 if (attallocs == NULL) goto mem_error;
1650 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001651 ctxt->maxatts = maxatts;
1652 }
1653 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001654mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657}
1658
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659/**
1660 * inputPush:
1661 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001662 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 *
1664 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001665 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001666 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001667 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001668int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001671 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001672 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 if (ctxt->inputNr >= ctxt->inputMax) {
1674 ctxt->inputMax *= 2;
1675 ctxt->inputTab =
1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677 ctxt->inputMax *
1678 sizeof(ctxt->inputTab[0]));
1679 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001681 xmlFreeInputStream(value);
1682 ctxt->inputMax /= 2;
1683 value = NULL;
1684 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001685 }
1686 }
1687 ctxt->inputTab[ctxt->inputNr] = value;
1688 ctxt->input = value;
1689 return (ctxt->inputNr++);
1690}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001693 * @ctxt: an XML parser context
1694 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001696 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001699xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700inputPop(xmlParserCtxtPtr ctxt)
1701{
1702 xmlParserInputPtr ret;
1703
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001704 if (ctxt == NULL)
1705 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001707 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 ctxt->inputNr--;
1709 if (ctxt->inputNr > 0)
1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711 else
1712 ctxt->input = NULL;
1713 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001714 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715 return (ret);
1716}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001717/**
1718 * nodePush:
1719 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001720 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 *
1722 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001723 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001724 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001729 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001731 xmlNodePtr *tmp;
1732
1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001736 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001737 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001738 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001739 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001740 ctxt->nodeTab = tmp;
1741 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001742 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001747 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001748 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001749 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001750 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001751 ctxt->nodeTab[ctxt->nodeNr] = value;
1752 ctxt->node = value;
1753 return (ctxt->nodeNr++);
1754}
Daniel Veillard8915c152008-08-26 13:05:34 +00001755
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756/**
1757 * nodePop:
1758 * @ctxt: an XML parser context
1759 *
1760 * Pops the top element node from the node stack
1761 *
1762 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001763 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001764xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765nodePop(xmlParserCtxtPtr ctxt)
1766{
1767 xmlNodePtr ret;
1768
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001771 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001772 ctxt->nodeNr--;
1773 if (ctxt->nodeNr > 0)
1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775 else
1776 ctxt->node = NULL;
1777 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001778 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 return (ret);
1780}
Daniel Veillarda2351322004-06-27 12:08:10 +00001781
1782#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001784 * nameNsPush:
1785 * @ctxt: an XML parser context
1786 * @value: the element name
1787 * @prefix: the element prefix
1788 * @URI: the element namespace name
1789 *
1790 * Pushes a new element name/prefix/URL on top of the name stack
1791 *
1792 * Returns -1 in case of error, the index in the stack otherwise
1793 */
1794static int
1795nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797{
1798 if (ctxt->nameNr >= ctxt->nameMax) {
1799 const xmlChar * *tmp;
1800 void **tmp2;
1801 ctxt->nameMax *= 2;
1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803 ctxt->nameMax *
1804 sizeof(ctxt->nameTab[0]));
1805 if (tmp == NULL) {
1806 ctxt->nameMax /= 2;
1807 goto mem_error;
1808 }
1809 ctxt->nameTab = tmp;
1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811 ctxt->nameMax * 3 *
1812 sizeof(ctxt->pushTab[0]));
1813 if (tmp2 == NULL) {
1814 ctxt->nameMax /= 2;
1815 goto mem_error;
1816 }
1817 ctxt->pushTab = tmp2;
1818 }
1819 ctxt->nameTab[ctxt->nameNr] = value;
1820 ctxt->name = value;
1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 return (ctxt->nameNr++);
1825mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 return (-1);
1828}
1829/**
1830 * nameNsPop:
1831 * @ctxt: an XML parser context
1832 *
1833 * Pops the top element/prefix/URI name from the name stack
1834 *
1835 * Returns the name just removed
1836 */
1837static const xmlChar *
1838nameNsPop(xmlParserCtxtPtr ctxt)
1839{
1840 const xmlChar *ret;
1841
1842 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001843 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844 ctxt->nameNr--;
1845 if (ctxt->nameNr > 0)
1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847 else
1848 ctxt->name = NULL;
1849 ret = ctxt->nameTab[ctxt->nameNr];
1850 ctxt->nameTab[ctxt->nameNr] = NULL;
1851 return (ret);
1852}
Daniel Veillarda2351322004-06-27 12:08:10 +00001853#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854
1855/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001856 * namePush:
1857 * @ctxt: an XML parser context
1858 * @value: the element name
1859 *
1860 * Pushes a new element name on top of the name stack
1861 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001864int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001865namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001866{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001867 if (ctxt == NULL) return (-1);
1868
Daniel Veillard1c732d22002-11-30 11:22:59 +00001869 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001872 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001873 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001874 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001875 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001878 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001879 }
1880 ctxt->nameTab[ctxt->nameNr] = value;
1881 ctxt->name = value;
1882 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001883mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001884 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886}
1887/**
1888 * namePop:
1889 * @ctxt: an XML parser context
1890 *
1891 * Pops the top element name from the name stack
1892 *
1893 * Returns the name just removed
1894 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001895const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896namePop(xmlParserCtxtPtr ctxt)
1897{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001898 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001899
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001900 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 ctxt->nameNr--;
1903 if (ctxt->nameNr > 0)
1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905 else
1906 ctxt->name = NULL;
1907 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001908 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 return (ret);
1910}
Owen Taylor3473f882001-02-23 17:55:21 +00001911
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001912static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001913 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001914 int *tmp;
1915
Owen Taylor3473f882001-02-23 17:55:21 +00001916 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001917 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001920 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001921 ctxt->spaceMax /=2;
1922 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001923 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001924 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 }
1926 ctxt->spaceTab[ctxt->spaceNr] = val;
1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928 return(ctxt->spaceNr++);
1929}
1930
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001931static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 int ret;
1933 if (ctxt->spaceNr <= 0) return(0);
1934 ctxt->spaceNr--;
1935 if (ctxt->spaceNr > 0)
1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001938 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001939 ret = ctxt->spaceTab[ctxt->spaceNr];
1940 ctxt->spaceTab[ctxt->spaceNr] = -1;
1941 return(ret);
1942}
1943
1944/*
1945 * Macros for accessing the content. Those should be used only by the parser,
1946 * and not exported.
1947 *
1948 * Dirty macros, i.e. one often need to make assumption on the context to
1949 * use them
1950 *
1951 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952 * To be used with extreme caution since operations consuming
1953 * characters may move the input buffer to a different location !
1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955 * This should be used internally by the parser
1956 * only to compare to ASCII values otherwise it would break when
1957 * running with UTF-8 encoding.
1958 * RAW same as CUR but in the input buffer, bypass any token
1959 * extraction that may have been done
1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961 * to compare on ASCII based substring.
1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001963 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967 *
1968 * NEXT Skip to the next character, this does the proper decoding
1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001970 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001971 * CUR_CHAR(l) returns the current unicode character (int), set l
1972 * to the number of xmlChars used for the encoding [0-5].
1973 * CUR_SCHAR same but operate on a string instead of the context
1974 * COPY_BUF copy the current unicode char to the target buffer, increment
1975 * the index
1976 * GROW, SHRINK handling of input buffers
1977 */
1978
Daniel Veillardfdc91562002-07-01 21:52:03 +00001979#define RAW (*ctxt->input->cur)
1980#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001981#define NXT(val) ctxt->input->cur[(val)]
1982#define CUR_PTR ctxt->input->cur
1983
Daniel Veillarda07050d2003-10-19 14:46:32 +00001984#define CMP4( s, c1, c2, c3, c4 ) \
1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997 ((unsigned char *) s)[ 8 ] == c9 )
1998#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000 ((unsigned char *) s)[ 9 ] == c10 )
2001
Owen Taylor3473f882001-02-23 17:55:21 +00002002#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002005 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007 xmlPopInput(ctxt); \
2008 } while (0)
2009
Daniel Veillard0b787f32004-03-26 17:29:53 +00002010#define SKIPL(val) do { \
2011 int skipl; \
2012 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002013 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002014 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002015 } else ctxt->input->col++; \
2016 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002017 ctxt->input->cur++; \
2018 } \
2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 if ((*ctxt->input->cur == 0) && \
2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022 xmlPopInput(ctxt); \
2023 } while (0)
2024
Daniel Veillarda880b122003-04-21 21:36:41 +00002025#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002028 xmlSHRINK (ctxt);
2029
2030static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031 xmlParserInputShrink(ctxt->input);
2032 if ((*ctxt->input->cur == 0) &&
2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002035 }
Owen Taylor3473f882001-02-23 17:55:21 +00002036
Daniel Veillarda880b122003-04-21 21:36:41 +00002037#define GROW if ((ctxt->progressive == 0) && \
2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002039 xmlGROW (ctxt);
2040
2041static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002042 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2043 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2044
2045 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2046 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002047 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002048 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2049 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002050 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002051 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002053 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002054 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2055 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002056}
Owen Taylor3473f882001-02-23 17:55:21 +00002057
2058#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2059
2060#define NEXT xmlNextChar(ctxt)
2061
Daniel Veillard21a0f912001-02-25 19:54:14 +00002062#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002063 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002064 ctxt->input->cur++; \
2065 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002066 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002067 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2068 }
2069
Owen Taylor3473f882001-02-23 17:55:21 +00002070#define NEXTL(l) do { \
2071 if (*(ctxt->input->cur) == '\n') { \
2072 ctxt->input->line++; ctxt->input->col = 1; \
2073 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002074 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002075 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002076 } while (0)
2077
2078#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2079#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2080
2081#define COPY_BUF(l,b,i,v) \
2082 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002083 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002084
2085/**
2086 * xmlSkipBlankChars:
2087 * @ctxt: the XML parser context
2088 *
2089 * skip all blanks character found at that point in the input streams.
2090 * It pops up finished entities in the process if allowable at that point.
2091 *
2092 * Returns the number of space chars skipped
2093 */
2094
2095int
2096xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002097 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002098
2099 /*
2100 * It's Okay to use CUR/NEXT here since all the blanks are on
2101 * the ASCII range.
2102 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002103 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2104 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002105 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002106 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002107 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002108 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002109 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002110 if (*cur == '\n') {
2111 ctxt->input->line++; ctxt->input->col = 1;
2112 }
2113 cur++;
2114 res++;
2115 if (*cur == 0) {
2116 ctxt->input->cur = cur;
2117 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2118 cur = ctxt->input->cur;
2119 }
2120 }
2121 ctxt->input->cur = cur;
2122 } else {
2123 int cur;
2124 do {
2125 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002126 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002127 NEXT;
2128 cur = CUR;
2129 res++;
2130 }
2131 while ((cur == 0) && (ctxt->inputNr > 1) &&
2132 (ctxt->instate != XML_PARSER_COMMENT)) {
2133 xmlPopInput(ctxt);
2134 cur = CUR;
2135 }
2136 /*
2137 * Need to handle support of entities branching here
2138 */
2139 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2140 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2141 }
Owen Taylor3473f882001-02-23 17:55:21 +00002142 return(res);
2143}
2144
2145/************************************************************************
2146 * *
2147 * Commodity functions to handle entities *
2148 * *
2149 ************************************************************************/
2150
2151/**
2152 * xmlPopInput:
2153 * @ctxt: an XML parser context
2154 *
2155 * xmlPopInput: the current input pointed by ctxt->input came to an end
2156 * pop it and return the next char.
2157 *
2158 * Returns the current xmlChar in the parser context
2159 */
2160xmlChar
2161xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002162 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002163 if (xmlParserDebugEntities)
2164 xmlGenericError(xmlGenericErrorContext,
2165 "Popping input %d\n", ctxt->inputNr);
2166 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002167 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002168 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2169 return(xmlPopInput(ctxt));
2170 return(CUR);
2171}
2172
2173/**
2174 * xmlPushInput:
2175 * @ctxt: an XML parser context
2176 * @input: an XML parser input fragment (entity, XML fragment ...).
2177 *
2178 * xmlPushInput: switch to a new input stream which is stacked on top
2179 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002180 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002181 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002182int
Owen Taylor3473f882001-02-23 17:55:21 +00002183xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002184 int ret;
2185 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002186
2187 if (xmlParserDebugEntities) {
2188 if ((ctxt->input != NULL) && (ctxt->input->filename))
2189 xmlGenericError(xmlGenericErrorContext,
2190 "%s(%d): ", ctxt->input->filename,
2191 ctxt->input->line);
2192 xmlGenericError(xmlGenericErrorContext,
2193 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2194 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002195 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002196 if (ctxt->instate == XML_PARSER_EOF)
2197 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002198 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002199 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002200}
2201
2202/**
2203 * xmlParseCharRef:
2204 * @ctxt: an XML parser context
2205 *
2206 * parse Reference declarations
2207 *
2208 * [66] CharRef ::= '&#' [0-9]+ ';' |
2209 * '&#x' [0-9a-fA-F]+ ';'
2210 *
2211 * [ WFC: Legal Character ]
2212 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002213 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002214 *
2215 * Returns the value parsed (as an int), 0 in case of error
2216 */
2217int
2218xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002219 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002220 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002221 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002222
Owen Taylor3473f882001-02-23 17:55:21 +00002223 /*
2224 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2225 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002226 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002227 (NXT(2) == 'x')) {
2228 SKIP(3);
2229 GROW;
2230 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002231 if (count++ > 20) {
2232 count = 0;
2233 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002234 if (ctxt->instate == XML_PARSER_EOF)
2235 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002236 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002237 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002238 val = val * 16 + (CUR - '0');
2239 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2240 val = val * 16 + (CUR - 'a') + 10;
2241 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2242 val = val * 16 + (CUR - 'A') + 10;
2243 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002244 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002245 val = 0;
2246 break;
2247 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002248 if (val > 0x10FFFF)
2249 outofrange = val;
2250
Owen Taylor3473f882001-02-23 17:55:21 +00002251 NEXT;
2252 count++;
2253 }
2254 if (RAW == ';') {
2255 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002256 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002257 ctxt->nbChars ++;
2258 ctxt->input->cur++;
2259 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002260 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002261 SKIP(2);
2262 GROW;
2263 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002264 if (count++ > 20) {
2265 count = 0;
2266 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002267 if (ctxt->instate == XML_PARSER_EOF)
2268 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002269 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002270 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002271 val = val * 10 + (CUR - '0');
2272 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002273 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002274 val = 0;
2275 break;
2276 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002277 if (val > 0x10FFFF)
2278 outofrange = val;
2279
Owen Taylor3473f882001-02-23 17:55:21 +00002280 NEXT;
2281 count++;
2282 }
2283 if (RAW == ';') {
2284 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002285 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002286 ctxt->nbChars ++;
2287 ctxt->input->cur++;
2288 }
2289 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002290 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 }
2292
2293 /*
2294 * [ WFC: Legal Character ]
2295 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002296 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002297 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002298 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002299 return(val);
2300 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002301 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2302 "xmlParseCharRef: invalid xmlChar value %d\n",
2303 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002304 }
2305 return(0);
2306}
2307
2308/**
2309 * xmlParseStringCharRef:
2310 * @ctxt: an XML parser context
2311 * @str: a pointer to an index in the string
2312 *
2313 * parse Reference declarations, variant parsing from a string rather
2314 * than an an input flow.
2315 *
2316 * [66] CharRef ::= '&#' [0-9]+ ';' |
2317 * '&#x' [0-9a-fA-F]+ ';'
2318 *
2319 * [ WFC: Legal Character ]
2320 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002321 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002322 *
2323 * Returns the value parsed (as an int), 0 in case of error, str will be
2324 * updated to the current value of the index
2325 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002326static int
Owen Taylor3473f882001-02-23 17:55:21 +00002327xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2328 const xmlChar *ptr;
2329 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002330 unsigned int val = 0;
2331 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002332
2333 if ((str == NULL) || (*str == NULL)) return(0);
2334 ptr = *str;
2335 cur = *ptr;
2336 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2337 ptr += 3;
2338 cur = *ptr;
2339 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002340 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002341 val = val * 16 + (cur - '0');
2342 else if ((cur >= 'a') && (cur <= 'f'))
2343 val = val * 16 + (cur - 'a') + 10;
2344 else if ((cur >= 'A') && (cur <= 'F'))
2345 val = val * 16 + (cur - 'A') + 10;
2346 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002347 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 val = 0;
2349 break;
2350 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002351 if (val > 0x10FFFF)
2352 outofrange = val;
2353
Owen Taylor3473f882001-02-23 17:55:21 +00002354 ptr++;
2355 cur = *ptr;
2356 }
2357 if (cur == ';')
2358 ptr++;
2359 } else if ((cur == '&') && (ptr[1] == '#')){
2360 ptr += 2;
2361 cur = *ptr;
2362 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002363 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002364 val = val * 10 + (cur - '0');
2365 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002366 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002367 val = 0;
2368 break;
2369 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002370 if (val > 0x10FFFF)
2371 outofrange = val;
2372
Owen Taylor3473f882001-02-23 17:55:21 +00002373 ptr++;
2374 cur = *ptr;
2375 }
2376 if (cur == ';')
2377 ptr++;
2378 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002379 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002380 return(0);
2381 }
2382 *str = ptr;
2383
2384 /*
2385 * [ WFC: Legal Character ]
2386 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002387 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002388 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002389 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002390 return(val);
2391 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002392 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2393 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2394 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002395 }
2396 return(0);
2397}
2398
2399/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002400 * xmlNewBlanksWrapperInputStream:
2401 * @ctxt: an XML parser context
2402 * @entity: an Entity pointer
2403 *
2404 * Create a new input stream for wrapping
2405 * blanks around a PEReference
2406 *
2407 * Returns the new input stream or NULL
2408 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002409
Daniel Veillardf5582f12002-06-11 10:08:16 +00002410static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002411
Daniel Veillardf4862f02002-09-10 11:13:43 +00002412static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002413xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2414 xmlParserInputPtr input;
2415 xmlChar *buffer;
2416 size_t length;
2417 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002418 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2419 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002420 return(NULL);
2421 }
2422 if (xmlParserDebugEntities)
2423 xmlGenericError(xmlGenericErrorContext,
2424 "new blanks wrapper for entity: %s\n", entity->name);
2425 input = xmlNewInputStream(ctxt);
2426 if (input == NULL) {
2427 return(NULL);
2428 }
2429 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002430 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002431 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002432 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002433 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002434 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002435 }
2436 buffer [0] = ' ';
2437 buffer [1] = '%';
2438 buffer [length-3] = ';';
2439 buffer [length-2] = ' ';
2440 buffer [length-1] = 0;
2441 memcpy(buffer + 2, entity->name, length - 5);
2442 input->free = deallocblankswrapper;
2443 input->base = buffer;
2444 input->cur = buffer;
2445 input->length = length;
2446 input->end = &buffer[length];
2447 return(input);
2448}
2449
2450/**
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * xmlParserHandlePEReference:
2452 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002453 *
Owen Taylor3473f882001-02-23 17:55:21 +00002454 * [69] PEReference ::= '%' Name ';'
2455 *
2456 * [ WFC: No Recursion ]
2457 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002458 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002459 *
2460 * [ WFC: Entity Declared ]
2461 * In a document without any DTD, a document with only an internal DTD
2462 * subset which contains no parameter entity references, or a document
2463 * with "standalone='yes'", ... ... The declaration of a parameter
2464 * entity must precede any reference to it...
2465 *
2466 * [ VC: Entity Declared ]
2467 * In a document with an external subset or external parameter entities
2468 * with "standalone='no'", ... ... The declaration of a parameter entity
2469 * must precede any reference to it...
2470 *
2471 * [ WFC: In DTD ]
2472 * Parameter-entity references may only appear in the DTD.
2473 * NOTE: misleading but this is handled.
2474 *
2475 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002476 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002477 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002478 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002479 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002480 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002481 */
2482void
2483xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002484 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002485 xmlEntityPtr entity = NULL;
2486 xmlParserInputPtr input;
2487
Owen Taylor3473f882001-02-23 17:55:21 +00002488 if (RAW != '%') return;
2489 switch(ctxt->instate) {
2490 case XML_PARSER_CDATA_SECTION:
2491 return;
2492 case XML_PARSER_COMMENT:
2493 return;
2494 case XML_PARSER_START_TAG:
2495 return;
2496 case XML_PARSER_END_TAG:
2497 return;
2498 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002499 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002500 return;
2501 case XML_PARSER_PROLOG:
2502 case XML_PARSER_START:
2503 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002504 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002505 return;
2506 case XML_PARSER_ENTITY_DECL:
2507 case XML_PARSER_CONTENT:
2508 case XML_PARSER_ATTRIBUTE_VALUE:
2509 case XML_PARSER_PI:
2510 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002511 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002512 /* we just ignore it there */
2513 return;
2514 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002515 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 return;
2517 case XML_PARSER_ENTITY_VALUE:
2518 /*
2519 * NOTE: in the case of entity values, we don't do the
2520 * substitution here since we need the literal
2521 * entity value to be able to save the internal
2522 * subset of the document.
2523 * This will be handled by xmlStringDecodeEntities
2524 */
2525 return;
2526 case XML_PARSER_DTD:
2527 /*
2528 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2529 * In the internal DTD subset, parameter-entity references
2530 * can occur only where markup declarations can occur, not
2531 * within markup declarations.
2532 * In that case this is handled in xmlParseMarkupDecl
2533 */
2534 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2535 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002536 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002537 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002538 break;
2539 case XML_PARSER_IGNORE:
2540 return;
2541 }
2542
2543 NEXT;
2544 name = xmlParseName(ctxt);
2545 if (xmlParserDebugEntities)
2546 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002547 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002548 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002549 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002550 } else {
2551 if (RAW == ';') {
2552 NEXT;
2553 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2554 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002555 if (ctxt->instate == XML_PARSER_EOF)
2556 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002557 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002558
Owen Taylor3473f882001-02-23 17:55:21 +00002559 /*
2560 * [ WFC: Entity Declared ]
2561 * In a document without any DTD, a document with only an
2562 * internal DTD subset which contains no parameter entity
2563 * references, or a document with "standalone='yes'", ...
2564 * ... The declaration of a parameter entity must precede
2565 * any reference to it...
2566 */
2567 if ((ctxt->standalone == 1) ||
2568 ((ctxt->hasExternalSubset == 0) &&
2569 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002570 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002571 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002572 } else {
2573 /*
2574 * [ VC: Entity Declared ]
2575 * In a document with an external subset or external
2576 * parameter entities with "standalone='no'", ...
2577 * ... The declaration of a parameter entity must precede
2578 * any reference to it...
2579 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002580 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2581 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2582 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002583 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002584 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002585 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2586 "PEReference: %%%s; not found\n",
2587 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002588 ctxt->valid = 0;
2589 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002590 } else if (ctxt->input->free != deallocblankswrapper) {
2591 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002592 if (xmlPushInput(ctxt, input) < 0)
2593 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002594 } else {
2595 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2596 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002597 xmlChar start[4];
2598 xmlCharEncoding enc;
2599
Owen Taylor3473f882001-02-23 17:55:21 +00002600 /*
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002601 * Note: external parameter entities will not be loaded, it
2602 * is not required for a non-validating parser, unless the
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002603 * option of validating, or substituting entities were
2604 * given. Doing so is far more secure as the parser will
2605 * only process data coming from the document entity by
2606 * default.
2607 */
2608 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2609 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2610 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002611 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2612 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2613 (ctxt->replaceEntities == 0) &&
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002614 (ctxt->validate == 0))
2615 return;
2616
2617 /*
Owen Taylor3473f882001-02-23 17:55:21 +00002618 * handle the extra spaces added before and after
2619 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002620 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002621 */
2622 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002623 if (xmlPushInput(ctxt, input) < 0)
2624 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002625
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002626 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002627 * Get the 4 first bytes and decode the charset
2628 * if enc != XML_CHAR_ENCODING_NONE
2629 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002630 * Note that, since we may have some non-UTF8
2631 * encoding (like UTF16, bug 135229), the 'length'
2632 * is not known, but we can calculate based upon
2633 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002634 */
2635 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002636 if (ctxt->instate == XML_PARSER_EOF)
2637 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002638 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002639 start[0] = RAW;
2640 start[1] = NXT(1);
2641 start[2] = NXT(2);
2642 start[3] = NXT(3);
2643 enc = xmlDetectCharEncoding(start, 4);
2644 if (enc != XML_CHAR_ENCODING_NONE) {
2645 xmlSwitchEncoding(ctxt, enc);
2646 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002647 }
2648
Owen Taylor3473f882001-02-23 17:55:21 +00002649 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002650 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2651 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002652 xmlParseTextDecl(ctxt);
2653 }
Owen Taylor3473f882001-02-23 17:55:21 +00002654 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002655 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2656 "PEReference: %s is not a parameter entity\n",
2657 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002658 }
2659 }
2660 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002661 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002662 }
Owen Taylor3473f882001-02-23 17:55:21 +00002663 }
2664}
2665
2666/*
2667 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002668 * buffer##_size is expected to be a size_t
2669 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002670 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002671#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002672 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002673 size_t new_size = buffer##_size * 2 + n; \
2674 if (new_size < buffer##_size) goto mem_error; \
2675 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002676 if (tmp == NULL) goto mem_error; \
2677 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002678 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002679}
2680
2681/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002682 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002683 * @ctxt: the parser context
2684 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002685 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002686 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2687 * @end: an end marker xmlChar, 0 if none
2688 * @end2: an end marker xmlChar, 0 if none
2689 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002690 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002691 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002692 *
2693 * [67] Reference ::= EntityRef | CharRef
2694 *
2695 * [69] PEReference ::= '%' Name ';'
2696 *
2697 * Returns A newly allocated string with the substitution done. The caller
2698 * must deallocate it !
2699 */
2700xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002701xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2702 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002703 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002704 size_t buffer_size = 0;
2705 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002706
2707 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002708 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002709 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002710 xmlEntityPtr ent;
2711 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002712
Daniel Veillarda82b1822004-11-08 16:24:57 +00002713 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002714 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002715 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002716
Daniel Veillard0161e632008-08-28 15:36:32 +00002717 if (((ctxt->depth > 40) &&
2718 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2719 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002720 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002721 return(NULL);
2722 }
2723
2724 /*
2725 * allocate a translation buffer.
2726 */
2727 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002728 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002729 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002730
2731 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002732 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002733 * we are operating on already parsed values.
2734 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002735 if (str < last)
2736 c = CUR_SCHAR(str, l);
2737 else
2738 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002739 while ((c != 0) && (c != end) && /* non input consuming loop */
2740 (c != end2) && (c != end3)) {
2741
2742 if (c == 0) break;
2743 if ((c == '&') && (str[1] == '#')) {
2744 int val = xmlParseStringCharRef(ctxt, &str);
2745 if (val != 0) {
2746 COPY_BUF(0,buffer,nbchars,val);
2747 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002748 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002749 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002750 }
Owen Taylor3473f882001-02-23 17:55:21 +00002751 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2752 if (xmlParserDebugEntities)
2753 xmlGenericError(xmlGenericErrorContext,
2754 "String decoding Entity Reference: %.30s\n",
2755 str);
2756 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002757 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2758 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002759 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002760 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002761 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002762 if ((ent != NULL) &&
2763 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2764 if (ent->content != NULL) {
2765 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002766 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002767 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002768 }
Owen Taylor3473f882001-02-23 17:55:21 +00002769 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002770 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2771 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002772 }
2773 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002774 ctxt->depth++;
2775 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2776 0, 0, 0);
2777 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002778
Owen Taylor3473f882001-02-23 17:55:21 +00002779 if (rep != NULL) {
2780 current = rep;
2781 while (*current != 0) { /* non input consuming loop */
2782 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002783 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002784 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002785 goto int_error;
2786 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002787 }
2788 }
2789 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002790 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002791 }
2792 } else if (ent != NULL) {
2793 int i = xmlStrlen(ent->name);
2794 const xmlChar *cur = ent->name;
2795
2796 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002797 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002798 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002799 }
2800 for (;i > 0;i--)
2801 buffer[nbchars++] = *cur++;
2802 buffer[nbchars++] = ';';
2803 }
2804 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2805 if (xmlParserDebugEntities)
2806 xmlGenericError(xmlGenericErrorContext,
2807 "String decoding PE Reference: %.30s\n", str);
2808 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002809 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2810 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002811 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002812 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002813 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002814 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002815 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002816 }
Owen Taylor3473f882001-02-23 17:55:21 +00002817 ctxt->depth++;
2818 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2819 0, 0, 0);
2820 ctxt->depth--;
2821 if (rep != NULL) {
2822 current = rep;
2823 while (*current != 0) { /* non input consuming loop */
2824 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002825 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002826 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002827 goto int_error;
2828 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002829 }
2830 }
2831 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002832 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002833 }
2834 }
2835 } else {
2836 COPY_BUF(l,buffer,nbchars,c);
2837 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002838 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2839 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002840 }
2841 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002842 if (str < last)
2843 c = CUR_SCHAR(str, l);
2844 else
2845 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002846 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002847 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002848 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002849
2850mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002851 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002852int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002853 if (rep != NULL)
2854 xmlFree(rep);
2855 if (buffer != NULL)
2856 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002857 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002858}
2859
Daniel Veillarde57ec792003-09-10 10:50:59 +00002860/**
2861 * xmlStringDecodeEntities:
2862 * @ctxt: the parser context
2863 * @str: the input string
2864 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2865 * @end: an end marker xmlChar, 0 if none
2866 * @end2: an end marker xmlChar, 0 if none
2867 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002868 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002869 * Takes a entity string content and process to do the adequate substitutions.
2870 *
2871 * [67] Reference ::= EntityRef | CharRef
2872 *
2873 * [69] PEReference ::= '%' Name ';'
2874 *
2875 * Returns A newly allocated string with the substitution done. The caller
2876 * must deallocate it !
2877 */
2878xmlChar *
2879xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2880 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002881 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002882 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2883 end, end2, end3));
2884}
Owen Taylor3473f882001-02-23 17:55:21 +00002885
2886/************************************************************************
2887 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002888 * Commodity functions, cleanup needed ? *
2889 * *
2890 ************************************************************************/
2891
2892/**
2893 * areBlanks:
2894 * @ctxt: an XML parser context
2895 * @str: a xmlChar *
2896 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002897 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002898 *
2899 * Is this a sequence of blank chars that one can ignore ?
2900 *
2901 * Returns 1 if ignorable 0 otherwise.
2902 */
2903
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002904static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2905 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002906 int i, ret;
2907 xmlNodePtr lastChild;
2908
Daniel Veillard05c13a22001-09-09 08:38:09 +00002909 /*
2910 * Don't spend time trying to differentiate them, the same callback is
2911 * used !
2912 */
2913 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002914 return(0);
2915
Owen Taylor3473f882001-02-23 17:55:21 +00002916 /*
2917 * Check for xml:space value.
2918 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002919 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2920 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002921 return(0);
2922
2923 /*
2924 * Check that the string is made of blanks
2925 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002926 if (blank_chars == 0) {
2927 for (i = 0;i < len;i++)
2928 if (!(IS_BLANK_CH(str[i]))) return(0);
2929 }
Owen Taylor3473f882001-02-23 17:55:21 +00002930
2931 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002932 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002933 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002934 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002935 if (ctxt->myDoc != NULL) {
2936 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2937 if (ret == 0) return(1);
2938 if (ret == 1) return(0);
2939 }
2940
2941 /*
2942 * Otherwise, heuristic :-\
2943 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002944 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002945 if ((ctxt->node->children == NULL) &&
2946 (RAW == '<') && (NXT(1) == '/')) return(0);
2947
2948 lastChild = xmlGetLastChild(ctxt->node);
2949 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002950 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2951 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002952 } else if (xmlNodeIsText(lastChild))
2953 return(0);
2954 else if ((ctxt->node->children != NULL) &&
2955 (xmlNodeIsText(ctxt->node->children)))
2956 return(0);
2957 return(1);
2958}
2959
Owen Taylor3473f882001-02-23 17:55:21 +00002960/************************************************************************
2961 * *
2962 * Extra stuff for namespace support *
2963 * Relates to http://www.w3.org/TR/WD-xml-names *
2964 * *
2965 ************************************************************************/
2966
2967/**
2968 * xmlSplitQName:
2969 * @ctxt: an XML parser context
2970 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002971 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002972 *
2973 * parse an UTF8 encoded XML qualified name string
2974 *
2975 * [NS 5] QName ::= (Prefix ':')? LocalPart
2976 *
2977 * [NS 6] Prefix ::= NCName
2978 *
2979 * [NS 7] LocalPart ::= NCName
2980 *
2981 * Returns the local part, and prefix is updated
2982 * to get the Prefix if any.
2983 */
2984
2985xmlChar *
2986xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2987 xmlChar buf[XML_MAX_NAMELEN + 5];
2988 xmlChar *buffer = NULL;
2989 int len = 0;
2990 int max = XML_MAX_NAMELEN;
2991 xmlChar *ret = NULL;
2992 const xmlChar *cur = name;
2993 int c;
2994
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002995 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002996 *prefix = NULL;
2997
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002998 if (cur == NULL) return(NULL);
2999
Owen Taylor3473f882001-02-23 17:55:21 +00003000#ifndef XML_XML_NAMESPACE
3001 /* xml: prefix is not really a namespace */
3002 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3003 (cur[2] == 'l') && (cur[3] == ':'))
3004 return(xmlStrdup(name));
3005#endif
3006
Daniel Veillard597bc482003-07-24 16:08:28 +00003007 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00003008 if (cur[0] == ':')
3009 return(xmlStrdup(name));
3010
3011 c = *cur++;
3012 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3013 buf[len++] = c;
3014 c = *cur++;
3015 }
3016 if (len >= max) {
3017 /*
3018 * Okay someone managed to make a huge name, so he's ready to pay
3019 * for the processing speed.
3020 */
3021 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003022
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003023 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003024 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003025 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003026 return(NULL);
3027 }
3028 memcpy(buffer, buf, len);
3029 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3030 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003031 xmlChar *tmp;
3032
Owen Taylor3473f882001-02-23 17:55:21 +00003033 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003034 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003035 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003036 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003037 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003038 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003039 return(NULL);
3040 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003041 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003042 }
3043 buffer[len++] = c;
3044 c = *cur++;
3045 }
3046 buffer[len] = 0;
3047 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003048
Daniel Veillard597bc482003-07-24 16:08:28 +00003049 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003050 if (buffer != NULL)
3051 xmlFree(buffer);
3052 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003053 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003054 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003055
Owen Taylor3473f882001-02-23 17:55:21 +00003056 if (buffer == NULL)
3057 ret = xmlStrndup(buf, len);
3058 else {
3059 ret = buffer;
3060 buffer = NULL;
3061 max = XML_MAX_NAMELEN;
3062 }
3063
3064
3065 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003066 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003067 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003068 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003069 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003070 }
Owen Taylor3473f882001-02-23 17:55:21 +00003071 len = 0;
3072
Daniel Veillardbb284f42002-10-16 18:02:47 +00003073 /*
3074 * Check that the first character is proper to start
3075 * a new name
3076 */
3077 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3078 ((c >= 0x41) && (c <= 0x5A)) ||
3079 (c == '_') || (c == ':'))) {
3080 int l;
3081 int first = CUR_SCHAR(cur, l);
3082
3083 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003084 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003085 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003086 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003087 }
3088 }
3089 cur++;
3090
Owen Taylor3473f882001-02-23 17:55:21 +00003091 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3092 buf[len++] = c;
3093 c = *cur++;
3094 }
3095 if (len >= max) {
3096 /*
3097 * Okay someone managed to make a huge name, so he's ready to pay
3098 * for the processing speed.
3099 */
3100 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003101
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003102 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003103 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003104 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003105 return(NULL);
3106 }
3107 memcpy(buffer, buf, len);
3108 while (c != 0) { /* tested bigname2.xml */
3109 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003110 xmlChar *tmp;
3111
Owen Taylor3473f882001-02-23 17:55:21 +00003112 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003113 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003114 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003115 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003116 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003117 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003118 return(NULL);
3119 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003120 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003121 }
3122 buffer[len++] = c;
3123 c = *cur++;
3124 }
3125 buffer[len] = 0;
3126 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003127
Owen Taylor3473f882001-02-23 17:55:21 +00003128 if (buffer == NULL)
3129 ret = xmlStrndup(buf, len);
3130 else {
3131 ret = buffer;
3132 }
3133 }
3134
3135 return(ret);
3136}
3137
3138/************************************************************************
3139 * *
3140 * The parser itself *
3141 * Relates to http://www.w3.org/TR/REC-xml *
3142 * *
3143 ************************************************************************/
3144
Daniel Veillard34e3f642008-07-29 09:02:27 +00003145/************************************************************************
3146 * *
3147 * Routines to parse Name, NCName and NmToken *
3148 * *
3149 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003150#ifdef DEBUG
3151static unsigned long nbParseName = 0;
3152static unsigned long nbParseNmToken = 0;
3153static unsigned long nbParseNCName = 0;
3154static unsigned long nbParseNCNameComplex = 0;
3155static unsigned long nbParseNameComplex = 0;
3156static unsigned long nbParseStringName = 0;
3157#endif
3158
Daniel Veillard34e3f642008-07-29 09:02:27 +00003159/*
3160 * The two following functions are related to the change of accepted
3161 * characters for Name and NmToken in the Revision 5 of XML-1.0
3162 * They correspond to the modified production [4] and the new production [4a]
3163 * changes in that revision. Also note that the macros used for the
3164 * productions Letter, Digit, CombiningChar and Extender are not needed
3165 * anymore.
3166 * We still keep compatibility to pre-revision5 parsing semantic if the
3167 * new XML_PARSE_OLD10 option is given to the parser.
3168 */
3169static int
3170xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3171 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3172 /*
3173 * Use the new checks of production [4] [4a] amd [5] of the
3174 * Update 5 of XML-1.0
3175 */
3176 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3177 (((c >= 'a') && (c <= 'z')) ||
3178 ((c >= 'A') && (c <= 'Z')) ||
3179 (c == '_') || (c == ':') ||
3180 ((c >= 0xC0) && (c <= 0xD6)) ||
3181 ((c >= 0xD8) && (c <= 0xF6)) ||
3182 ((c >= 0xF8) && (c <= 0x2FF)) ||
3183 ((c >= 0x370) && (c <= 0x37D)) ||
3184 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3185 ((c >= 0x200C) && (c <= 0x200D)) ||
3186 ((c >= 0x2070) && (c <= 0x218F)) ||
3187 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3188 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3189 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3190 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3191 ((c >= 0x10000) && (c <= 0xEFFFF))))
3192 return(1);
3193 } else {
3194 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3195 return(1);
3196 }
3197 return(0);
3198}
3199
3200static int
3201xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3202 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3203 /*
3204 * Use the new checks of production [4] [4a] amd [5] of the
3205 * Update 5 of XML-1.0
3206 */
3207 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3208 (((c >= 'a') && (c <= 'z')) ||
3209 ((c >= 'A') && (c <= 'Z')) ||
3210 ((c >= '0') && (c <= '9')) || /* !start */
3211 (c == '_') || (c == ':') ||
3212 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3213 ((c >= 0xC0) && (c <= 0xD6)) ||
3214 ((c >= 0xD8) && (c <= 0xF6)) ||
3215 ((c >= 0xF8) && (c <= 0x2FF)) ||
3216 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3217 ((c >= 0x370) && (c <= 0x37D)) ||
3218 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3219 ((c >= 0x200C) && (c <= 0x200D)) ||
3220 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3221 ((c >= 0x2070) && (c <= 0x218F)) ||
3222 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3223 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3224 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3225 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3226 ((c >= 0x10000) && (c <= 0xEFFFF))))
3227 return(1);
3228 } else {
3229 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3230 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003231 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003232 (IS_COMBINING(c)) ||
3233 (IS_EXTENDER(c)))
3234 return(1);
3235 }
3236 return(0);
3237}
3238
Daniel Veillarde57ec792003-09-10 10:50:59 +00003239static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003240 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003241
Daniel Veillard34e3f642008-07-29 09:02:27 +00003242static const xmlChar *
3243xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3244 int len = 0, l;
3245 int c;
3246 int count = 0;
3247
Daniel Veillardc6561462009-03-25 10:22:31 +00003248#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003249 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003250#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003251
3252 /*
3253 * Handler for more complex cases
3254 */
3255 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003256 if (ctxt->instate == XML_PARSER_EOF)
3257 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003258 c = CUR_CHAR(l);
3259 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3260 /*
3261 * Use the new checks of production [4] [4a] amd [5] of the
3262 * Update 5 of XML-1.0
3263 */
3264 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3265 (!(((c >= 'a') && (c <= 'z')) ||
3266 ((c >= 'A') && (c <= 'Z')) ||
3267 (c == '_') || (c == ':') ||
3268 ((c >= 0xC0) && (c <= 0xD6)) ||
3269 ((c >= 0xD8) && (c <= 0xF6)) ||
3270 ((c >= 0xF8) && (c <= 0x2FF)) ||
3271 ((c >= 0x370) && (c <= 0x37D)) ||
3272 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3273 ((c >= 0x200C) && (c <= 0x200D)) ||
3274 ((c >= 0x2070) && (c <= 0x218F)) ||
3275 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3276 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3277 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3278 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3279 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3280 return(NULL);
3281 }
3282 len += l;
3283 NEXTL(l);
3284 c = CUR_CHAR(l);
3285 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3286 (((c >= 'a') && (c <= 'z')) ||
3287 ((c >= 'A') && (c <= 'Z')) ||
3288 ((c >= '0') && (c <= '9')) || /* !start */
3289 (c == '_') || (c == ':') ||
3290 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3291 ((c >= 0xC0) && (c <= 0xD6)) ||
3292 ((c >= 0xD8) && (c <= 0xF6)) ||
3293 ((c >= 0xF8) && (c <= 0x2FF)) ||
3294 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3295 ((c >= 0x370) && (c <= 0x37D)) ||
3296 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3297 ((c >= 0x200C) && (c <= 0x200D)) ||
3298 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3299 ((c >= 0x2070) && (c <= 0x218F)) ||
3300 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3301 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3302 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3303 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3304 ((c >= 0x10000) && (c <= 0xEFFFF))
3305 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003306 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003307 count = 0;
3308 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003309 if (ctxt->instate == XML_PARSER_EOF)
3310 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003311 }
3312 len += l;
3313 NEXTL(l);
3314 c = CUR_CHAR(l);
3315 }
3316 } else {
3317 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3318 (!IS_LETTER(c) && (c != '_') &&
3319 (c != ':'))) {
3320 return(NULL);
3321 }
3322 len += l;
3323 NEXTL(l);
3324 c = CUR_CHAR(l);
3325
3326 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3327 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3328 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003329 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003330 (IS_COMBINING(c)) ||
3331 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003332 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003333 count = 0;
3334 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003335 if (ctxt->instate == XML_PARSER_EOF)
3336 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003337 }
3338 len += l;
3339 NEXTL(l);
3340 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003341 if (c == 0) {
3342 count = 0;
3343 GROW;
3344 if (ctxt->instate == XML_PARSER_EOF)
3345 return(NULL);
3346 c = CUR_CHAR(l);
3347 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003348 }
3349 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003350 if ((len > XML_MAX_NAME_LENGTH) &&
3351 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3352 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3353 return(NULL);
3354 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003355 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3356 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3357 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3358}
3359
Owen Taylor3473f882001-02-23 17:55:21 +00003360/**
3361 * xmlParseName:
3362 * @ctxt: an XML parser context
3363 *
3364 * parse an XML name.
3365 *
3366 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3367 * CombiningChar | Extender
3368 *
3369 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3370 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003371 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003372 *
3373 * Returns the Name parsed or NULL
3374 */
3375
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003376const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003377xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003378 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003379 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003380 int count = 0;
3381
3382 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003383
Daniel Veillardc6561462009-03-25 10:22:31 +00003384#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003385 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003386#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003387
Daniel Veillard48b2f892001-02-25 16:11:03 +00003388 /*
3389 * Accelerator for simple ASCII names
3390 */
3391 in = ctxt->input->cur;
3392 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3393 ((*in >= 0x41) && (*in <= 0x5A)) ||
3394 (*in == '_') || (*in == ':')) {
3395 in++;
3396 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3397 ((*in >= 0x41) && (*in <= 0x5A)) ||
3398 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003399 (*in == '_') || (*in == '-') ||
3400 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003401 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003402 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003403 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003404 if ((count > XML_MAX_NAME_LENGTH) &&
3405 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3406 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3407 return(NULL);
3408 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003409 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003410 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003411 ctxt->nbChars += count;
3412 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003413 if (ret == NULL)
3414 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003415 return(ret);
3416 }
3417 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003418 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003419 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003420}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003421
Daniel Veillard34e3f642008-07-29 09:02:27 +00003422static const xmlChar *
3423xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3424 int len = 0, l;
3425 int c;
3426 int count = 0;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003427 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
Daniel Veillard34e3f642008-07-29 09:02:27 +00003428
Daniel Veillardc6561462009-03-25 10:22:31 +00003429#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003430 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003431#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432
3433 /*
3434 * Handler for more complex cases
3435 */
3436 GROW;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003437 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003438 c = CUR_CHAR(l);
3439 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3440 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3441 return(NULL);
3442 }
3443
3444 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3445 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003446 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003447 if ((len > XML_MAX_NAME_LENGTH) &&
3448 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3449 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3450 return(NULL);
3451 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003452 count = 0;
3453 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003454 if (ctxt->instate == XML_PARSER_EOF)
3455 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003456 }
3457 len += l;
3458 NEXTL(l);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003459 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003460 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003461 if (c == 0) {
3462 count = 0;
3463 GROW;
3464 if (ctxt->instate == XML_PARSER_EOF)
3465 return(NULL);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003466 end = ctxt->input->cur;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003467 c = CUR_CHAR(l);
3468 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003469 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003470 if ((len > XML_MAX_NAME_LENGTH) &&
3471 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3472 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3473 return(NULL);
3474 }
Daniel Veillarddcc19502013-05-22 22:56:45 +02003475 return(xmlDictLookup(ctxt->dict, end - len, len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003476}
3477
3478/**
3479 * xmlParseNCName:
3480 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003481 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003482 *
3483 * parse an XML name.
3484 *
3485 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3486 * CombiningChar | Extender
3487 *
3488 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3489 *
3490 * Returns the Name parsed or NULL
3491 */
3492
3493static const xmlChar *
3494xmlParseNCName(xmlParserCtxtPtr ctxt) {
3495 const xmlChar *in;
3496 const xmlChar *ret;
3497 int count = 0;
3498
Daniel Veillardc6561462009-03-25 10:22:31 +00003499#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003500 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003501#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003502
3503 /*
3504 * Accelerator for simple ASCII names
3505 */
3506 in = ctxt->input->cur;
3507 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3508 ((*in >= 0x41) && (*in <= 0x5A)) ||
3509 (*in == '_')) {
3510 in++;
3511 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3512 ((*in >= 0x41) && (*in <= 0x5A)) ||
3513 ((*in >= 0x30) && (*in <= 0x39)) ||
3514 (*in == '_') || (*in == '-') ||
3515 (*in == '.'))
3516 in++;
3517 if ((*in > 0) && (*in < 0x80)) {
3518 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003519 if ((count > XML_MAX_NAME_LENGTH) &&
3520 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3521 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522 return(NULL);
3523 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003524 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525 ctxt->input->cur = in;
3526 ctxt->nbChars += count;
3527 ctxt->input->col += count;
3528 if (ret == NULL) {
3529 xmlErrMemory(ctxt, NULL);
3530 }
3531 return(ret);
3532 }
3533 }
3534 return(xmlParseNCNameComplex(ctxt));
3535}
3536
Daniel Veillard46de64e2002-05-29 08:21:33 +00003537/**
3538 * xmlParseNameAndCompare:
3539 * @ctxt: an XML parser context
3540 *
3541 * parse an XML name and compares for match
3542 * (specialized for endtag parsing)
3543 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003544 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545 * and the name for mismatch
3546 */
3547
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003548static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003549xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003550 register const xmlChar *cmp = other;
3551 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003552 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003553
3554 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003555 if (ctxt->instate == XML_PARSER_EOF)
3556 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003557
Daniel Veillard46de64e2002-05-29 08:21:33 +00003558 in = ctxt->input->cur;
3559 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003560 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003561 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003562 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003563 }
William M. Brack76e95df2003-10-18 16:20:14 +00003564 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003565 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003566 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003567 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003568 }
3569 /* failure (or end of input buffer), check with full function */
3570 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003571 /* strings coming from the dictionnary direct compare possible */
3572 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003573 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003574 }
3575 return ret;
3576}
3577
Owen Taylor3473f882001-02-23 17:55:21 +00003578/**
3579 * xmlParseStringName:
3580 * @ctxt: an XML parser context
3581 * @str: a pointer to the string pointer (IN/OUT)
3582 *
3583 * parse an XML name.
3584 *
3585 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586 * CombiningChar | Extender
3587 *
3588 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003590 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003591 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003592 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003593 * is updated to the current location in the string.
3594 */
3595
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003596static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003597xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598 xmlChar buf[XML_MAX_NAMELEN + 5];
3599 const xmlChar *cur = *str;
3600 int len = 0, l;
3601 int c;
3602
Daniel Veillardc6561462009-03-25 10:22:31 +00003603#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003604 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003605#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003606
Owen Taylor3473f882001-02-23 17:55:21 +00003607 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003608 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003609 return(NULL);
3610 }
3611
Daniel Veillard34e3f642008-07-29 09:02:27 +00003612 COPY_BUF(l,buf,len,c);
3613 cur += l;
3614 c = CUR_SCHAR(cur, l);
3615 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003616 COPY_BUF(l,buf,len,c);
3617 cur += l;
3618 c = CUR_SCHAR(cur, l);
3619 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3620 /*
3621 * Okay someone managed to make a huge name, so he's ready to pay
3622 * for the processing speed.
3623 */
3624 xmlChar *buffer;
3625 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003626
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003627 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003628 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003629 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003630 return(NULL);
3631 }
3632 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003633 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003634 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003635 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003636
3637 if ((len > XML_MAX_NAME_LENGTH) &&
3638 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3639 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3640 xmlFree(buffer);
3641 return(NULL);
3642 }
Owen Taylor3473f882001-02-23 17:55:21 +00003643 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003644 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003645 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003646 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003647 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003648 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003649 return(NULL);
3650 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003651 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003652 }
3653 COPY_BUF(l,buffer,len,c);
3654 cur += l;
3655 c = CUR_SCHAR(cur, l);
3656 }
3657 buffer[len] = 0;
3658 *str = cur;
3659 return(buffer);
3660 }
3661 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003662 if ((len > XML_MAX_NAME_LENGTH) &&
3663 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3664 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665 return(NULL);
3666 }
Owen Taylor3473f882001-02-23 17:55:21 +00003667 *str = cur;
3668 return(xmlStrndup(buf, len));
3669}
3670
3671/**
3672 * xmlParseNmtoken:
3673 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003674 *
Owen Taylor3473f882001-02-23 17:55:21 +00003675 * parse an XML Nmtoken.
3676 *
3677 * [7] Nmtoken ::= (NameChar)+
3678 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003679 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003680 *
3681 * Returns the Nmtoken parsed or NULL
3682 */
3683
3684xmlChar *
3685xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3686 xmlChar buf[XML_MAX_NAMELEN + 5];
3687 int len = 0, l;
3688 int c;
3689 int count = 0;
3690
Daniel Veillardc6561462009-03-25 10:22:31 +00003691#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003692 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003693#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003694
Owen Taylor3473f882001-02-23 17:55:21 +00003695 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003696 if (ctxt->instate == XML_PARSER_EOF)
3697 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003698 c = CUR_CHAR(l);
3699
Daniel Veillard34e3f642008-07-29 09:02:27 +00003700 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003701 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003702 count = 0;
3703 GROW;
3704 }
3705 COPY_BUF(l,buf,len,c);
3706 NEXTL(l);
3707 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003708 if (c == 0) {
3709 count = 0;
3710 GROW;
3711 if (ctxt->instate == XML_PARSER_EOF)
3712 return(NULL);
3713 c = CUR_CHAR(l);
3714 }
Owen Taylor3473f882001-02-23 17:55:21 +00003715 if (len >= XML_MAX_NAMELEN) {
3716 /*
3717 * Okay someone managed to make a huge token, so he's ready to pay
3718 * for the processing speed.
3719 */
3720 xmlChar *buffer;
3721 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003722
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003723 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003724 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003725 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003726 return(NULL);
3727 }
3728 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003729 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003730 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003731 count = 0;
3732 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003733 if (ctxt->instate == XML_PARSER_EOF) {
3734 xmlFree(buffer);
3735 return(NULL);
3736 }
Owen Taylor3473f882001-02-23 17:55:21 +00003737 }
3738 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003739 xmlChar *tmp;
3740
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003741 if ((max > XML_MAX_NAME_LENGTH) &&
3742 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3743 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744 xmlFree(buffer);
3745 return(NULL);
3746 }
Owen Taylor3473f882001-02-23 17:55:21 +00003747 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003748 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003749 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003750 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003751 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003752 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003753 return(NULL);
3754 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003755 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003756 }
3757 COPY_BUF(l,buffer,len,c);
3758 NEXTL(l);
3759 c = CUR_CHAR(l);
3760 }
3761 buffer[len] = 0;
3762 return(buffer);
3763 }
3764 }
3765 if (len == 0)
3766 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003767 if ((len > XML_MAX_NAME_LENGTH) &&
3768 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3769 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3770 return(NULL);
3771 }
Owen Taylor3473f882001-02-23 17:55:21 +00003772 return(xmlStrndup(buf, len));
3773}
3774
3775/**
3776 * xmlParseEntityValue:
3777 * @ctxt: an XML parser context
3778 * @orig: if non-NULL store a copy of the original entity value
3779 *
3780 * parse a value for ENTITY declarations
3781 *
3782 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3783 * "'" ([^%&'] | PEReference | Reference)* "'"
3784 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003785 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003786 */
3787
3788xmlChar *
3789xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3790 xmlChar *buf = NULL;
3791 int len = 0;
3792 int size = XML_PARSER_BUFFER_SIZE;
3793 int c, l;
3794 xmlChar stop;
3795 xmlChar *ret = NULL;
3796 const xmlChar *cur = NULL;
3797 xmlParserInputPtr input;
3798
3799 if (RAW == '"') stop = '"';
3800 else if (RAW == '\'') stop = '\'';
3801 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003802 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003803 return(NULL);
3804 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003805 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003806 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003807 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003808 return(NULL);
3809 }
3810
3811 /*
3812 * The content of the entity definition is copied in a buffer.
3813 */
3814
3815 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3816 input = ctxt->input;
3817 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003818 if (ctxt->instate == XML_PARSER_EOF) {
3819 xmlFree(buf);
3820 return(NULL);
3821 }
Owen Taylor3473f882001-02-23 17:55:21 +00003822 NEXT;
3823 c = CUR_CHAR(l);
3824 /*
3825 * NOTE: 4.4.5 Included in Literal
3826 * When a parameter entity reference appears in a literal entity
3827 * value, ... a single or double quote character in the replacement
3828 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003829 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003830 * In practice it means we stop the loop only when back at parsing
3831 * the initial entity and the quote is found
3832 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003833 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3834 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003835 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003836 xmlChar *tmp;
3837
Owen Taylor3473f882001-02-23 17:55:21 +00003838 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003839 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3840 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003841 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003842 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003843 return(NULL);
3844 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003845 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003846 }
3847 COPY_BUF(l,buf,len,c);
3848 NEXTL(l);
3849 /*
3850 * Pop-up of finished entities.
3851 */
3852 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3853 xmlPopInput(ctxt);
3854
3855 GROW;
3856 c = CUR_CHAR(l);
3857 if (c == 0) {
3858 GROW;
3859 c = CUR_CHAR(l);
3860 }
3861 }
3862 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003863 if (ctxt->instate == XML_PARSER_EOF) {
3864 xmlFree(buf);
3865 return(NULL);
3866 }
Owen Taylor3473f882001-02-23 17:55:21 +00003867
3868 /*
3869 * Raise problem w.r.t. '&' and '%' being used in non-entities
3870 * reference constructs. Note Charref will be handled in
3871 * xmlStringDecodeEntities()
3872 */
3873 cur = buf;
3874 while (*cur != 0) { /* non input consuming */
3875 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3876 xmlChar *name;
3877 xmlChar tmp = *cur;
3878
3879 cur++;
3880 name = xmlParseStringName(ctxt, &cur);
3881 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003882 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003883 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003884 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003885 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003886 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3887 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003888 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003889 }
3890 if (name != NULL)
3891 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003892 if (*cur == 0)
3893 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003894 }
3895 cur++;
3896 }
3897
3898 /*
3899 * Then PEReference entities are substituted.
3900 */
3901 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003902 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003903 xmlFree(buf);
3904 } else {
3905 NEXT;
3906 /*
3907 * NOTE: 4.4.7 Bypassed
3908 * When a general entity reference appears in the EntityValue in
3909 * an entity declaration, it is bypassed and left as is.
3910 * so XML_SUBSTITUTE_REF is not set here.
3911 */
3912 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3913 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003914 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003915 *orig = buf;
3916 else
3917 xmlFree(buf);
3918 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003919
Owen Taylor3473f882001-02-23 17:55:21 +00003920 return(ret);
3921}
3922
3923/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003924 * xmlParseAttValueComplex:
3925 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003926 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003927 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003928 *
3929 * parse a value for an attribute, this is the fallback function
3930 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003931 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003932 *
3933 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3934 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003935static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003936xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003937 xmlChar limit = 0;
3938 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003939 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003940 size_t len = 0;
3941 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003942 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003943 xmlChar *current = NULL;
3944 xmlEntityPtr ent;
3945
Owen Taylor3473f882001-02-23 17:55:21 +00003946 if (NXT(0) == '"') {
3947 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3948 limit = '"';
3949 NEXT;
3950 } else if (NXT(0) == '\'') {
3951 limit = '\'';
3952 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3953 NEXT;
3954 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003955 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003956 return(NULL);
3957 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003958
Owen Taylor3473f882001-02-23 17:55:21 +00003959 /*
3960 * allocate a translation buffer.
3961 */
3962 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003963 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003964 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003965
3966 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003967 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003968 */
3969 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003970 while (((NXT(0) != limit) && /* checked */
3971 (IS_CHAR(c)) && (c != '<')) &&
3972 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003973 /*
3974 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3975 * special option is given
3976 */
3977 if ((len > XML_MAX_TEXT_LENGTH) &&
3978 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3979 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003980 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003981 goto mem_error;
3982 }
Owen Taylor3473f882001-02-23 17:55:21 +00003983 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003984 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003985 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003986 if (NXT(1) == '#') {
3987 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003988
Owen Taylor3473f882001-02-23 17:55:21 +00003989 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003990 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003991 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003992 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003993 }
3994 buf[len++] = '&';
3995 } else {
3996 /*
3997 * The reparsing will be done in xmlStringGetNodeList()
3998 * called by the attribute() function in SAX.c
3999 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08004000 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004001 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004002 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004003 buf[len++] = '&';
4004 buf[len++] = '#';
4005 buf[len++] = '3';
4006 buf[len++] = '8';
4007 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00004008 }
Daniel Veillarddc171602008-03-26 17:41:38 +00004009 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004010 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004011 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004012 }
Owen Taylor3473f882001-02-23 17:55:21 +00004013 len += xmlCopyChar(0, &buf[len], val);
4014 }
4015 } else {
4016 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004017 ctxt->nbentities++;
4018 if (ent != NULL)
4019 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004020 if ((ent != NULL) &&
4021 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004022 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004023 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004024 }
4025 if ((ctxt->replaceEntities == 0) &&
4026 (ent->content[0] == '&')) {
4027 buf[len++] = '&';
4028 buf[len++] = '#';
4029 buf[len++] = '3';
4030 buf[len++] = '8';
4031 buf[len++] = ';';
4032 } else {
4033 buf[len++] = ent->content[0];
4034 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004035 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004036 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004037 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4038 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004039 XML_SUBSTITUTE_REF,
4040 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004041 if (rep != NULL) {
4042 current = rep;
4043 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004044 if ((*current == 0xD) || (*current == 0xA) ||
4045 (*current == 0x9)) {
4046 buf[len++] = 0x20;
4047 current++;
4048 } else
4049 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004050 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004051 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004052 }
4053 }
4054 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004055 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004056 }
4057 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004058 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004059 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004060 }
Owen Taylor3473f882001-02-23 17:55:21 +00004061 if (ent->content != NULL)
4062 buf[len++] = ent->content[0];
4063 }
4064 } else if (ent != NULL) {
4065 int i = xmlStrlen(ent->name);
4066 const xmlChar *cur = ent->name;
4067
4068 /*
4069 * This may look absurd but is needed to detect
4070 * entities problems
4071 */
4072 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004073 (ent->content != NULL) && (ent->checked == 0)) {
4074 unsigned long oldnbent = ctxt->nbentities;
4075
Owen Taylor3473f882001-02-23 17:55:21 +00004076 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004077 XML_SUBSTITUTE_REF, 0, 0, 0);
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004078
Daniel Veillardcff25462013-03-11 15:57:55 +08004079 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004080 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004081 if (xmlStrchr(rep, '<'))
4082 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004083 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004084 rep = NULL;
4085 }
Owen Taylor3473f882001-02-23 17:55:21 +00004086 }
4087
4088 /*
4089 * Just output the reference
4090 */
4091 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004092 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004093 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004094 }
4095 for (;i > 0;i--)
4096 buf[len++] = *cur++;
4097 buf[len++] = ';';
4098 }
4099 }
4100 } else {
4101 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004102 if ((len != 0) || (!normalize)) {
4103 if ((!normalize) || (!in_space)) {
4104 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004105 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004106 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004107 }
4108 }
4109 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004110 }
4111 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004112 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004113 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004114 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004115 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004116 }
4117 }
4118 NEXTL(l);
4119 }
4120 GROW;
4121 c = CUR_CHAR(l);
4122 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004123 if (ctxt->instate == XML_PARSER_EOF)
4124 goto error;
4125
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004126 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004127 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004128 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004129 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004131 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004132 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004133 if ((c != 0) && (!IS_CHAR(c))) {
4134 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4135 "invalid character in attribute value\n");
4136 } else {
4137 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4138 "AttValue: ' expected\n");
4139 }
Owen Taylor3473f882001-02-23 17:55:21 +00004140 } else
4141 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004142
4143 /*
4144 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004145 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004146 */
4147 if (len >= INT_MAX) {
4148 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004149 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004150 goto mem_error;
4151 }
4152
4153 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004154 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004155
4156mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004157 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004158error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004159 if (buf != NULL)
4160 xmlFree(buf);
4161 if (rep != NULL)
4162 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004163 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004164}
4165
4166/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004167 * xmlParseAttValue:
4168 * @ctxt: an XML parser context
4169 *
4170 * parse a value for an attribute
4171 * Note: the parser won't do substitution of entities here, this
4172 * will be handled later in xmlStringGetNodeList
4173 *
4174 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4175 * "'" ([^<&'] | Reference)* "'"
4176 *
4177 * 3.3.3 Attribute-Value Normalization:
4178 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004179 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004180 * - a character reference is processed by appending the referenced
4181 * character to the attribute value
4182 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004183 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004184 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4185 * appending #x20 to the normalized value, except that only a single
4186 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004187 * parsed entity or the literal entity value of an internal parsed entity
4188 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004189 * If the declared value is not CDATA, then the XML processor must further
4190 * process the normalized attribute value by discarding any leading and
4191 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004192 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004193 * All attributes for which no declaration has been read should be treated
4194 * by a non-validating parser as if declared CDATA.
4195 *
4196 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4197 */
4198
4199
4200xmlChar *
4201xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004202 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004203 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004204}
4205
4206/**
Owen Taylor3473f882001-02-23 17:55:21 +00004207 * xmlParseSystemLiteral:
4208 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004209 *
Owen Taylor3473f882001-02-23 17:55:21 +00004210 * parse an XML Literal
4211 *
4212 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4213 *
4214 * Returns the SystemLiteral parsed or NULL
4215 */
4216
4217xmlChar *
4218xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4219 xmlChar *buf = NULL;
4220 int len = 0;
4221 int size = XML_PARSER_BUFFER_SIZE;
4222 int cur, l;
4223 xmlChar stop;
4224 int state = ctxt->instate;
4225 int count = 0;
4226
4227 SHRINK;
4228 if (RAW == '"') {
4229 NEXT;
4230 stop = '"';
4231 } else if (RAW == '\'') {
4232 NEXT;
4233 stop = '\'';
4234 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004235 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004236 return(NULL);
4237 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004238
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004239 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004240 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004241 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004242 return(NULL);
4243 }
4244 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4245 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004246 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004247 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004248 xmlChar *tmp;
4249
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004250 if ((size > XML_MAX_NAME_LENGTH) &&
4251 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4252 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4253 xmlFree(buf);
4254 ctxt->instate = (xmlParserInputState) state;
4255 return(NULL);
4256 }
Owen Taylor3473f882001-02-23 17:55:21 +00004257 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004258 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4259 if (tmp == NULL) {
4260 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004261 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004262 ctxt->instate = (xmlParserInputState) state;
4263 return(NULL);
4264 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004265 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004266 }
4267 count++;
4268 if (count > 50) {
4269 GROW;
4270 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004271 if (ctxt->instate == XML_PARSER_EOF) {
4272 xmlFree(buf);
4273 return(NULL);
4274 }
Owen Taylor3473f882001-02-23 17:55:21 +00004275 }
4276 COPY_BUF(l,buf,len,cur);
4277 NEXTL(l);
4278 cur = CUR_CHAR(l);
4279 if (cur == 0) {
4280 GROW;
4281 SHRINK;
4282 cur = CUR_CHAR(l);
4283 }
4284 }
4285 buf[len] = 0;
4286 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004287 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004288 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004289 } else {
4290 NEXT;
4291 }
4292 return(buf);
4293}
4294
4295/**
4296 * xmlParsePubidLiteral:
4297 * @ctxt: an XML parser context
4298 *
4299 * parse an XML public literal
4300 *
4301 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4302 *
4303 * Returns the PubidLiteral parsed or NULL.
4304 */
4305
4306xmlChar *
4307xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4308 xmlChar *buf = NULL;
4309 int len = 0;
4310 int size = XML_PARSER_BUFFER_SIZE;
4311 xmlChar cur;
4312 xmlChar stop;
4313 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004314 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004315
4316 SHRINK;
4317 if (RAW == '"') {
4318 NEXT;
4319 stop = '"';
4320 } else if (RAW == '\'') {
4321 NEXT;
4322 stop = '\'';
4323 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004324 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 return(NULL);
4326 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004327 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004328 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004329 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004330 return(NULL);
4331 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004332 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004333 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004334 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004335 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004336 xmlChar *tmp;
4337
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004338 if ((size > XML_MAX_NAME_LENGTH) &&
4339 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4340 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4341 xmlFree(buf);
4342 return(NULL);
4343 }
Owen Taylor3473f882001-02-23 17:55:21 +00004344 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004345 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4346 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004347 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004348 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004349 return(NULL);
4350 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004351 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004352 }
4353 buf[len++] = cur;
4354 count++;
4355 if (count > 50) {
4356 GROW;
4357 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004358 if (ctxt->instate == XML_PARSER_EOF) {
4359 xmlFree(buf);
4360 return(NULL);
4361 }
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 NEXT;
4364 cur = CUR;
4365 if (cur == 0) {
4366 GROW;
4367 SHRINK;
4368 cur = CUR;
4369 }
4370 }
4371 buf[len] = 0;
4372 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004373 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004374 } else {
4375 NEXT;
4376 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004377 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004378 return(buf);
4379}
4380
Daniel Veillard8ed10722009-08-20 19:17:36 +02004381static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004382
4383/*
4384 * used for the test in the inner loop of the char data testing
4385 */
4386static const unsigned char test_char_data[256] = {
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4392 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4393 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4394 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4395 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4396 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4397 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4398 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4399 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4400 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4401 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4402 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4403 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4404 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4419};
4420
Owen Taylor3473f882001-02-23 17:55:21 +00004421/**
4422 * xmlParseCharData:
4423 * @ctxt: an XML parser context
4424 * @cdata: int indicating whether we are within a CDATA section
4425 *
4426 * parse a CharData section.
4427 * if we are within a CDATA section ']]>' marks an end of section.
4428 *
4429 * The right angle bracket (>) may be represented using the string "&gt;",
4430 * and must, for compatibility, be escaped using "&gt;" or a character
4431 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004432 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004433 *
4434 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4435 */
4436
4437void
4438xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004439 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004440 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004441 int line = ctxt->input->line;
4442 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004443 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004444
4445 SHRINK;
4446 GROW;
4447 /*
4448 * Accelerated common case where input don't need to be
4449 * modified before passing it to the handler.
4450 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004451 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004452 in = ctxt->input->cur;
4453 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004454get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004455 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004456 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004457 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004458 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004459 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004460 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004461 goto get_more_space;
4462 }
4463 if (*in == '<') {
4464 nbchar = in - ctxt->input->cur;
4465 if (nbchar > 0) {
4466 const xmlChar *tmp = ctxt->input->cur;
4467 ctxt->input->cur = in;
4468
Daniel Veillard34099b42004-11-04 17:34:35 +00004469 if ((ctxt->sax != NULL) &&
4470 (ctxt->sax->ignorableWhitespace !=
4471 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004472 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004473 if (ctxt->sax->ignorableWhitespace != NULL)
4474 ctxt->sax->ignorableWhitespace(ctxt->userData,
4475 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004476 } else {
4477 if (ctxt->sax->characters != NULL)
4478 ctxt->sax->characters(ctxt->userData,
4479 tmp, nbchar);
4480 if (*ctxt->space == -1)
4481 *ctxt->space = -2;
4482 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004483 } else if ((ctxt->sax != NULL) &&
4484 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004485 ctxt->sax->characters(ctxt->userData,
4486 tmp, nbchar);
4487 }
4488 }
4489 return;
4490 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004491
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004492get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004493 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004494 while (test_char_data[*in]) {
4495 in++;
4496 ccol++;
4497 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004498 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004499 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004500 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004501 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004502 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004503 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004504 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004505 }
4506 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004507 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004508 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004509 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004510 return;
4511 }
4512 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004513 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004514 goto get_more;
4515 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004516 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004517 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004518 if ((ctxt->sax != NULL) &&
4519 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004520 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004521 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004522 const xmlChar *tmp = ctxt->input->cur;
4523 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004524
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004525 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004526 if (ctxt->sax->ignorableWhitespace != NULL)
4527 ctxt->sax->ignorableWhitespace(ctxt->userData,
4528 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004529 } else {
4530 if (ctxt->sax->characters != NULL)
4531 ctxt->sax->characters(ctxt->userData,
4532 tmp, nbchar);
4533 if (*ctxt->space == -1)
4534 *ctxt->space = -2;
4535 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004536 line = ctxt->input->line;
4537 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004538 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004539 if (ctxt->sax->characters != NULL)
4540 ctxt->sax->characters(ctxt->userData,
4541 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004542 line = ctxt->input->line;
4543 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004544 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004545 /* something really bad happened in the SAX callback */
4546 if (ctxt->instate != XML_PARSER_CONTENT)
4547 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004548 }
4549 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004550 if (*in == 0xD) {
4551 in++;
4552 if (*in == 0xA) {
4553 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004554 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004555 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004556 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004557 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004558 in--;
4559 }
4560 if (*in == '<') {
4561 return;
4562 }
4563 if (*in == '&') {
4564 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004565 }
4566 SHRINK;
4567 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004568 if (ctxt->instate == XML_PARSER_EOF)
4569 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004570 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004571 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004572 nbchar = 0;
4573 }
Daniel Veillard50582112001-03-26 22:52:16 +00004574 ctxt->input->line = line;
4575 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004576 xmlParseCharDataComplex(ctxt, cdata);
4577}
4578
Daniel Veillard01c13b52002-12-10 15:19:08 +00004579/**
4580 * xmlParseCharDataComplex:
4581 * @ctxt: an XML parser context
4582 * @cdata: int indicating whether we are within a CDATA section
4583 *
4584 * parse a CharData section.this is the fallback function
4585 * of xmlParseCharData() when the parsing requires handling
4586 * of non-ASCII characters.
4587 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004588static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004589xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004590 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4591 int nbchar = 0;
4592 int cur, l;
4593 int count = 0;
4594
4595 SHRINK;
4596 GROW;
4597 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004598 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004599 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004600 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004601 if ((cur == ']') && (NXT(1) == ']') &&
4602 (NXT(2) == '>')) {
4603 if (cdata) break;
4604 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004605 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004606 }
4607 }
4608 COPY_BUF(l,buf,nbchar,cur);
4609 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004610 buf[nbchar] = 0;
4611
Owen Taylor3473f882001-02-23 17:55:21 +00004612 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004613 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004614 */
4615 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004616 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004617 if (ctxt->sax->ignorableWhitespace != NULL)
4618 ctxt->sax->ignorableWhitespace(ctxt->userData,
4619 buf, nbchar);
4620 } else {
4621 if (ctxt->sax->characters != NULL)
4622 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004623 if ((ctxt->sax->characters !=
4624 ctxt->sax->ignorableWhitespace) &&
4625 (*ctxt->space == -1))
4626 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004627 }
4628 }
4629 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004630 /* something really bad happened in the SAX callback */
4631 if (ctxt->instate != XML_PARSER_CONTENT)
4632 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004633 }
4634 count++;
4635 if (count > 50) {
4636 GROW;
4637 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004638 if (ctxt->instate == XML_PARSER_EOF)
4639 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004640 }
4641 NEXTL(l);
4642 cur = CUR_CHAR(l);
4643 }
4644 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004645 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004646 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004647 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004648 */
4649 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004650 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004651 if (ctxt->sax->ignorableWhitespace != NULL)
4652 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4653 } else {
4654 if (ctxt->sax->characters != NULL)
4655 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004656 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4657 (*ctxt->space == -1))
4658 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004659 }
4660 }
4661 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004662 if ((cur != 0) && (!IS_CHAR(cur))) {
4663 /* Generate the error and skip the offending character */
4664 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4665 "PCDATA invalid Char value %d\n",
4666 cur);
4667 NEXTL(l);
4668 }
Owen Taylor3473f882001-02-23 17:55:21 +00004669}
4670
4671/**
4672 * xmlParseExternalID:
4673 * @ctxt: an XML parser context
4674 * @publicID: a xmlChar** receiving PubidLiteral
4675 * @strict: indicate whether we should restrict parsing to only
4676 * production [75], see NOTE below
4677 *
4678 * Parse an External ID or a Public ID
4679 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004680 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004681 * 'PUBLIC' S PubidLiteral S SystemLiteral
4682 *
4683 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4684 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4685 *
4686 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4687 *
4688 * Returns the function returns SystemLiteral and in the second
4689 * case publicID receives PubidLiteral, is strict is off
4690 * it is possible to return NULL and have publicID set.
4691 */
4692
4693xmlChar *
4694xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4695 xmlChar *URI = NULL;
4696
4697 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004698
4699 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004700 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004701 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004702 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004703 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4704 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004705 }
4706 SKIP_BLANKS;
4707 URI = xmlParseSystemLiteral(ctxt);
4708 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004709 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004710 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004711 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004712 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004713 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004714 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004715 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004716 }
4717 SKIP_BLANKS;
4718 *publicID = xmlParsePubidLiteral(ctxt);
4719 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004720 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004721 }
4722 if (strict) {
4723 /*
4724 * We don't handle [83] so "S SystemLiteral" is required.
4725 */
William M. Brack76e95df2003-10-18 16:20:14 +00004726 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004727 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004728 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004729 }
4730 } else {
4731 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004732 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004733 * "S SystemLiteral" is not detected. From a purely parsing
4734 * point of view that's a nice mess.
4735 */
4736 const xmlChar *ptr;
4737 GROW;
4738
4739 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004740 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004741
William M. Brack76e95df2003-10-18 16:20:14 +00004742 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004743 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4744 }
4745 SKIP_BLANKS;
4746 URI = xmlParseSystemLiteral(ctxt);
4747 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004748 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004749 }
4750 }
4751 return(URI);
4752}
4753
4754/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004755 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004756 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004757 * @buf: the already parsed part of the buffer
4758 * @len: number of bytes filles in the buffer
4759 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004760 *
4761 * Skip an XML (SGML) comment <!-- .... -->
4762 * The spec says that "For compatibility, the string "--" (double-hyphen)
4763 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004764 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004765 *
4766 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4767 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004768static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004769xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4770 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004771 int q, ql;
4772 int r, rl;
4773 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004774 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004775 int inputid;
4776
4777 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004778
Owen Taylor3473f882001-02-23 17:55:21 +00004779 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004780 len = 0;
4781 size = XML_PARSER_BUFFER_SIZE;
4782 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4783 if (buf == NULL) {
4784 xmlErrMemory(ctxt, NULL);
4785 return;
4786 }
Owen Taylor3473f882001-02-23 17:55:21 +00004787 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004788 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004789 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004790 if (q == 0)
4791 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004792 if (!IS_CHAR(q)) {
4793 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4794 "xmlParseComment: invalid xmlChar value %d\n",
4795 q);
4796 xmlFree (buf);
4797 return;
4798 }
Owen Taylor3473f882001-02-23 17:55:21 +00004799 NEXTL(ql);
4800 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004801 if (r == 0)
4802 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004803 if (!IS_CHAR(r)) {
4804 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4805 "xmlParseComment: invalid xmlChar value %d\n",
4806 q);
4807 xmlFree (buf);
4808 return;
4809 }
Owen Taylor3473f882001-02-23 17:55:21 +00004810 NEXTL(rl);
4811 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004812 if (cur == 0)
4813 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004814 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004815 ((cur != '>') ||
4816 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004817 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004818 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004820 if ((len > XML_MAX_TEXT_LENGTH) &&
4821 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4822 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4823 "Comment too big found", NULL);
4824 xmlFree (buf);
4825 return;
4826 }
Owen Taylor3473f882001-02-23 17:55:21 +00004827 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004828 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004829 size_t new_size;
4830
4831 new_size = size * 2;
4832 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004833 if (new_buf == NULL) {
4834 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004835 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004836 return;
4837 }
William M. Bracka3215c72004-07-31 16:24:01 +00004838 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004839 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004840 }
4841 COPY_BUF(ql,buf,len,q);
4842 q = r;
4843 ql = rl;
4844 r = cur;
4845 rl = l;
4846
4847 count++;
4848 if (count > 50) {
4849 GROW;
4850 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004851 if (ctxt->instate == XML_PARSER_EOF) {
4852 xmlFree(buf);
4853 return;
4854 }
Owen Taylor3473f882001-02-23 17:55:21 +00004855 }
4856 NEXTL(l);
4857 cur = CUR_CHAR(l);
4858 if (cur == 0) {
4859 SHRINK;
4860 GROW;
4861 cur = CUR_CHAR(l);
4862 }
4863 }
4864 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004865 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004866 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004867 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004868 } else if (!IS_CHAR(cur)) {
4869 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4870 "xmlParseComment: invalid xmlChar value %d\n",
4871 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004872 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004873 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004874 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4875 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004876 }
4877 NEXT;
4878 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4879 (!ctxt->disableSAX))
4880 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004881 }
Daniel Veillardda629342007-08-01 07:49:06 +00004882 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004883 return;
4884not_terminated:
4885 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4886 "Comment not terminated\n", NULL);
4887 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004888 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004889}
Daniel Veillardda629342007-08-01 07:49:06 +00004890
Daniel Veillard4c778d82005-01-23 17:37:44 +00004891/**
4892 * xmlParseComment:
4893 * @ctxt: an XML parser context
4894 *
4895 * Skip an XML (SGML) comment <!-- .... -->
4896 * The spec says that "For compatibility, the string "--" (double-hyphen)
4897 * must not occur within comments. "
4898 *
4899 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4900 */
4901void
4902xmlParseComment(xmlParserCtxtPtr ctxt) {
4903 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004904 size_t size = XML_PARSER_BUFFER_SIZE;
4905 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004906 xmlParserInputState state;
4907 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004908 size_t nbchar = 0;
4909 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004910 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004911
4912 /*
4913 * Check that there is a comment right here.
4914 */
4915 if ((RAW != '<') || (NXT(1) != '!') ||
4916 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004917 state = ctxt->instate;
4918 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004919 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004920 SKIP(4);
4921 SHRINK;
4922 GROW;
4923
4924 /*
4925 * Accelerated common case where input don't need to be
4926 * modified before passing it to the handler.
4927 */
4928 in = ctxt->input->cur;
4929 do {
4930 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004931 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004932 ctxt->input->line++; ctxt->input->col = 1;
4933 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004934 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004935 }
4936get_more:
4937 ccol = ctxt->input->col;
4938 while (((*in > '-') && (*in <= 0x7F)) ||
4939 ((*in >= 0x20) && (*in < '-')) ||
4940 (*in == 0x09)) {
4941 in++;
4942 ccol++;
4943 }
4944 ctxt->input->col = ccol;
4945 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004946 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004947 ctxt->input->line++; ctxt->input->col = 1;
4948 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004949 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004950 goto get_more;
4951 }
4952 nbchar = in - ctxt->input->cur;
4953 /*
4954 * save current set of data
4955 */
4956 if (nbchar > 0) {
4957 if ((ctxt->sax != NULL) &&
4958 (ctxt->sax->comment != NULL)) {
4959 if (buf == NULL) {
4960 if ((*in == '-') && (in[1] == '-'))
4961 size = nbchar + 1;
4962 else
4963 size = XML_PARSER_BUFFER_SIZE + nbchar;
4964 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4965 if (buf == NULL) {
4966 xmlErrMemory(ctxt, NULL);
4967 ctxt->instate = state;
4968 return;
4969 }
4970 len = 0;
4971 } else if (len + nbchar + 1 >= size) {
4972 xmlChar *new_buf;
4973 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4974 new_buf = (xmlChar *) xmlRealloc(buf,
4975 size * sizeof(xmlChar));
4976 if (new_buf == NULL) {
4977 xmlFree (buf);
4978 xmlErrMemory(ctxt, NULL);
4979 ctxt->instate = state;
4980 return;
4981 }
4982 buf = new_buf;
4983 }
4984 memcpy(&buf[len], ctxt->input->cur, nbchar);
4985 len += nbchar;
4986 buf[len] = 0;
4987 }
4988 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004989 if ((len > XML_MAX_TEXT_LENGTH) &&
4990 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4991 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4992 "Comment too big found", NULL);
4993 xmlFree (buf);
4994 return;
4995 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004996 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004997 if (*in == 0xA) {
4998 in++;
4999 ctxt->input->line++; ctxt->input->col = 1;
5000 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005001 if (*in == 0xD) {
5002 in++;
5003 if (*in == 0xA) {
5004 ctxt->input->cur = in;
5005 in++;
5006 ctxt->input->line++; ctxt->input->col = 1;
5007 continue; /* while */
5008 }
5009 in--;
5010 }
5011 SHRINK;
5012 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005013 if (ctxt->instate == XML_PARSER_EOF) {
5014 xmlFree(buf);
5015 return;
5016 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005017 in = ctxt->input->cur;
5018 if (*in == '-') {
5019 if (in[1] == '-') {
5020 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005021 if (ctxt->input->id != inputid) {
5022 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5023 "comment doesn't start and stop in the same entity\n");
5024 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005025 SKIP(3);
5026 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5027 (!ctxt->disableSAX)) {
5028 if (buf != NULL)
5029 ctxt->sax->comment(ctxt->userData, buf);
5030 else
5031 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5032 }
5033 if (buf != NULL)
5034 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005035 if (ctxt->instate != XML_PARSER_EOF)
5036 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005037 return;
5038 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005039 if (buf != NULL) {
5040 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5041 "Double hyphen within comment: "
5042 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005043 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005044 } else
5045 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5046 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005047 in++;
5048 ctxt->input->col++;
5049 }
5050 in++;
5051 ctxt->input->col++;
5052 goto get_more;
5053 }
5054 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5055 xmlParseCommentComplex(ctxt, buf, len, size);
5056 ctxt->instate = state;
5057 return;
5058}
5059
Owen Taylor3473f882001-02-23 17:55:21 +00005060
5061/**
5062 * xmlParsePITarget:
5063 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005064 *
Owen Taylor3473f882001-02-23 17:55:21 +00005065 * parse the name of a PI
5066 *
5067 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5068 *
5069 * Returns the PITarget name or NULL
5070 */
5071
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005072const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005073xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005074 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005075
5076 name = xmlParseName(ctxt);
5077 if ((name != NULL) &&
5078 ((name[0] == 'x') || (name[0] == 'X')) &&
5079 ((name[1] == 'm') || (name[1] == 'M')) &&
5080 ((name[2] == 'l') || (name[2] == 'L'))) {
5081 int i;
5082 if ((name[0] == 'x') && (name[1] == 'm') &&
5083 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005084 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005085 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005086 return(name);
5087 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005088 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005089 return(name);
5090 }
5091 for (i = 0;;i++) {
5092 if (xmlW3CPIs[i] == NULL) break;
5093 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5094 return(name);
5095 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005096 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5097 "xmlParsePITarget: invalid name prefix 'xml'\n",
5098 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005099 }
Daniel Veillard37334572008-07-31 08:20:02 +00005100 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005101 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005102 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5103 }
Owen Taylor3473f882001-02-23 17:55:21 +00005104 return(name);
5105}
5106
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005107#ifdef LIBXML_CATALOG_ENABLED
5108/**
5109 * xmlParseCatalogPI:
5110 * @ctxt: an XML parser context
5111 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005112 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005113 * parse an XML Catalog Processing Instruction.
5114 *
5115 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5116 *
5117 * Occurs only if allowed by the user and if happening in the Misc
5118 * part of the document before any doctype informations
5119 * This will add the given catalog to the parsing context in order
5120 * to be used if there is a resolution need further down in the document
5121 */
5122
5123static void
5124xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5125 xmlChar *URL = NULL;
5126 const xmlChar *tmp, *base;
5127 xmlChar marker;
5128
5129 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005130 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005131 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5132 goto error;
5133 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005134 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005135 if (*tmp != '=') {
5136 return;
5137 }
5138 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005139 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005140 marker = *tmp;
5141 if ((marker != '\'') && (marker != '"'))
5142 goto error;
5143 tmp++;
5144 base = tmp;
5145 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5146 if (*tmp == 0)
5147 goto error;
5148 URL = xmlStrndup(base, tmp - base);
5149 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005150 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005151 if (*tmp != 0)
5152 goto error;
5153
5154 if (URL != NULL) {
5155 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5156 xmlFree(URL);
5157 }
5158 return;
5159
5160error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005161 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5162 "Catalog PI syntax error: %s\n",
5163 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005164 if (URL != NULL)
5165 xmlFree(URL);
5166}
5167#endif
5168
Owen Taylor3473f882001-02-23 17:55:21 +00005169/**
5170 * xmlParsePI:
5171 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005172 *
Owen Taylor3473f882001-02-23 17:55:21 +00005173 * parse an XML Processing Instruction.
5174 *
5175 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5176 *
5177 * The processing is transfered to SAX once parsed.
5178 */
5179
5180void
5181xmlParsePI(xmlParserCtxtPtr ctxt) {
5182 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005183 size_t len = 0;
5184 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005185 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005186 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005187 xmlParserInputState state;
5188 int count = 0;
5189
5190 if ((RAW == '<') && (NXT(1) == '?')) {
5191 xmlParserInputPtr input = ctxt->input;
5192 state = ctxt->instate;
5193 ctxt->instate = XML_PARSER_PI;
5194 /*
5195 * this is a Processing Instruction.
5196 */
5197 SKIP(2);
5198 SHRINK;
5199
5200 /*
5201 * Parse the target name and check for special support like
5202 * namespace.
5203 */
5204 target = xmlParsePITarget(ctxt);
5205 if (target != NULL) {
5206 if ((RAW == '?') && (NXT(1) == '>')) {
5207 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005208 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5209 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005210 }
5211 SKIP(2);
5212
5213 /*
5214 * SAX: PI detected.
5215 */
5216 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5217 (ctxt->sax->processingInstruction != NULL))
5218 ctxt->sax->processingInstruction(ctxt->userData,
5219 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005220 if (ctxt->instate != XML_PARSER_EOF)
5221 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005222 return;
5223 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005224 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005225 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005226 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005227 ctxt->instate = state;
5228 return;
5229 }
5230 cur = CUR;
5231 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005232 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5233 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005234 }
5235 SKIP_BLANKS;
5236 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005237 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005238 ((cur != '?') || (NXT(1) != '>'))) {
5239 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005240 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005241 size_t new_size = size * 2;
5242 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005243 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005244 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005245 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005246 ctxt->instate = state;
5247 return;
5248 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005249 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005250 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005251 }
5252 count++;
5253 if (count > 50) {
5254 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005255 if (ctxt->instate == XML_PARSER_EOF) {
5256 xmlFree(buf);
5257 return;
5258 }
Owen Taylor3473f882001-02-23 17:55:21 +00005259 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005260 if ((len > XML_MAX_TEXT_LENGTH) &&
5261 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5262 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5263 "PI %s too big found", target);
5264 xmlFree(buf);
5265 ctxt->instate = state;
5266 return;
5267 }
Owen Taylor3473f882001-02-23 17:55:21 +00005268 }
5269 COPY_BUF(l,buf,len,cur);
5270 NEXTL(l);
5271 cur = CUR_CHAR(l);
5272 if (cur == 0) {
5273 SHRINK;
5274 GROW;
5275 cur = CUR_CHAR(l);
5276 }
5277 }
Daniel Veillard51304812012-07-19 20:34:26 +08005278 if ((len > XML_MAX_TEXT_LENGTH) &&
5279 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5280 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5281 "PI %s too big found", target);
5282 xmlFree(buf);
5283 ctxt->instate = state;
5284 return;
5285 }
Owen Taylor3473f882001-02-23 17:55:21 +00005286 buf[len] = 0;
5287 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005288 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5289 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005290 } else {
5291 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005292 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5293 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005294 }
5295 SKIP(2);
5296
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005297#ifdef LIBXML_CATALOG_ENABLED
5298 if (((state == XML_PARSER_MISC) ||
5299 (state == XML_PARSER_START)) &&
5300 (xmlStrEqual(target, XML_CATALOG_PI))) {
5301 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5302 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5303 (allow == XML_CATA_ALLOW_ALL))
5304 xmlParseCatalogPI(ctxt, buf);
5305 }
5306#endif
5307
5308
Owen Taylor3473f882001-02-23 17:55:21 +00005309 /*
5310 * SAX: PI detected.
5311 */
5312 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5313 (ctxt->sax->processingInstruction != NULL))
5314 ctxt->sax->processingInstruction(ctxt->userData,
5315 target, buf);
5316 }
5317 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005318 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005319 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005320 }
Chris Evans77404b82011-12-14 16:18:25 +08005321 if (ctxt->instate != XML_PARSER_EOF)
5322 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005323 }
5324}
5325
5326/**
5327 * xmlParseNotationDecl:
5328 * @ctxt: an XML parser context
5329 *
5330 * parse a notation declaration
5331 *
5332 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5333 *
5334 * Hence there is actually 3 choices:
5335 * 'PUBLIC' S PubidLiteral
5336 * 'PUBLIC' S PubidLiteral S SystemLiteral
5337 * and 'SYSTEM' S SystemLiteral
5338 *
5339 * See the NOTE on xmlParseExternalID().
5340 */
5341
5342void
5343xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005344 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005345 xmlChar *Pubid;
5346 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005347
Daniel Veillarda07050d2003-10-19 14:46:32 +00005348 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005349 xmlParserInputPtr input = ctxt->input;
5350 SHRINK;
5351 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005352 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005353 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5354 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005355 return;
5356 }
5357 SKIP_BLANKS;
5358
Daniel Veillard76d66f42001-05-16 21:05:17 +00005359 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005361 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005362 return;
5363 }
William M. Brack76e95df2003-10-18 16:20:14 +00005364 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005365 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005366 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005367 return;
5368 }
Daniel Veillard37334572008-07-31 08:20:02 +00005369 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005370 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005371 "colon are forbidden from notation names '%s'\n",
5372 name, NULL, NULL);
5373 }
Owen Taylor3473f882001-02-23 17:55:21 +00005374 SKIP_BLANKS;
5375
5376 /*
5377 * Parse the IDs.
5378 */
5379 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5380 SKIP_BLANKS;
5381
5382 if (RAW == '>') {
5383 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005384 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5385 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005386 }
5387 NEXT;
5388 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5389 (ctxt->sax->notationDecl != NULL))
5390 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5391 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005392 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005393 }
Owen Taylor3473f882001-02-23 17:55:21 +00005394 if (Systemid != NULL) xmlFree(Systemid);
5395 if (Pubid != NULL) xmlFree(Pubid);
5396 }
5397}
5398
5399/**
5400 * xmlParseEntityDecl:
5401 * @ctxt: an XML parser context
5402 *
5403 * parse <!ENTITY declarations
5404 *
5405 * [70] EntityDecl ::= GEDecl | PEDecl
5406 *
5407 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5408 *
5409 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5410 *
5411 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5412 *
5413 * [74] PEDef ::= EntityValue | ExternalID
5414 *
5415 * [76] NDataDecl ::= S 'NDATA' S Name
5416 *
5417 * [ VC: Notation Declared ]
5418 * The Name must match the declared name of a notation.
5419 */
5420
5421void
5422xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005423 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005424 xmlChar *value = NULL;
5425 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005426 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005427 int isParameter = 0;
5428 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005429 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005430
Daniel Veillard4c778d82005-01-23 17:37:44 +00005431 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005432 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005433 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005434 SHRINK;
5435 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005436 skipped = SKIP_BLANKS;
5437 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005438 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5439 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005440 }
Owen Taylor3473f882001-02-23 17:55:21 +00005441
5442 if (RAW == '%') {
5443 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005444 skipped = SKIP_BLANKS;
5445 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005446 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5447 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005448 }
Owen Taylor3473f882001-02-23 17:55:21 +00005449 isParameter = 1;
5450 }
5451
Daniel Veillard76d66f42001-05-16 21:05:17 +00005452 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005453 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005454 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5455 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005456 return;
5457 }
Daniel Veillard37334572008-07-31 08:20:02 +00005458 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005459 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005460 "colon are forbidden from entities names '%s'\n",
5461 name, NULL, NULL);
5462 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005463 skipped = SKIP_BLANKS;
5464 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005465 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5466 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005467 }
Owen Taylor3473f882001-02-23 17:55:21 +00005468
Daniel Veillardf5582f12002-06-11 10:08:16 +00005469 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005470 /*
5471 * handle the various case of definitions...
5472 */
5473 if (isParameter) {
5474 if ((RAW == '"') || (RAW == '\'')) {
5475 value = xmlParseEntityValue(ctxt, &orig);
5476 if (value) {
5477 if ((ctxt->sax != NULL) &&
5478 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5479 ctxt->sax->entityDecl(ctxt->userData, name,
5480 XML_INTERNAL_PARAMETER_ENTITY,
5481 NULL, NULL, value);
5482 }
5483 } else {
5484 URI = xmlParseExternalID(ctxt, &literal, 1);
5485 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005486 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005487 }
5488 if (URI) {
5489 xmlURIPtr uri;
5490
5491 uri = xmlParseURI((const char *) URI);
5492 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005493 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5494 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005495 /*
5496 * This really ought to be a well formedness error
5497 * but the XML Core WG decided otherwise c.f. issue
5498 * E26 of the XML erratas.
5499 */
Owen Taylor3473f882001-02-23 17:55:21 +00005500 } else {
5501 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005502 /*
5503 * Okay this is foolish to block those but not
5504 * invalid URIs.
5505 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005506 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005507 } else {
5508 if ((ctxt->sax != NULL) &&
5509 (!ctxt->disableSAX) &&
5510 (ctxt->sax->entityDecl != NULL))
5511 ctxt->sax->entityDecl(ctxt->userData, name,
5512 XML_EXTERNAL_PARAMETER_ENTITY,
5513 literal, URI, NULL);
5514 }
5515 xmlFreeURI(uri);
5516 }
5517 }
5518 }
5519 } else {
5520 if ((RAW == '"') || (RAW == '\'')) {
5521 value = xmlParseEntityValue(ctxt, &orig);
5522 if ((ctxt->sax != NULL) &&
5523 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5524 ctxt->sax->entityDecl(ctxt->userData, name,
5525 XML_INTERNAL_GENERAL_ENTITY,
5526 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005527 /*
5528 * For expat compatibility in SAX mode.
5529 */
5530 if ((ctxt->myDoc == NULL) ||
5531 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5532 if (ctxt->myDoc == NULL) {
5533 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005534 if (ctxt->myDoc == NULL) {
5535 xmlErrMemory(ctxt, "New Doc failed");
5536 return;
5537 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005538 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005539 }
5540 if (ctxt->myDoc->intSubset == NULL)
5541 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5542 BAD_CAST "fake", NULL, NULL);
5543
Daniel Veillard1af9a412003-08-20 22:54:39 +00005544 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5545 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005546 }
Owen Taylor3473f882001-02-23 17:55:21 +00005547 } else {
5548 URI = xmlParseExternalID(ctxt, &literal, 1);
5549 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005550 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005551 }
5552 if (URI) {
5553 xmlURIPtr uri;
5554
5555 uri = xmlParseURI((const char *)URI);
5556 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005557 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5558 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005559 /*
5560 * This really ought to be a well formedness error
5561 * but the XML Core WG decided otherwise c.f. issue
5562 * E26 of the XML erratas.
5563 */
Owen Taylor3473f882001-02-23 17:55:21 +00005564 } else {
5565 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005566 /*
5567 * Okay this is foolish to block those but not
5568 * invalid URIs.
5569 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005570 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005571 }
5572 xmlFreeURI(uri);
5573 }
5574 }
William M. Brack76e95df2003-10-18 16:20:14 +00005575 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005576 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5577 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005578 }
5579 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005580 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005581 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005582 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005583 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5584 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005585 }
5586 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005587 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005588 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5589 (ctxt->sax->unparsedEntityDecl != NULL))
5590 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5591 literal, URI, ndata);
5592 } else {
5593 if ((ctxt->sax != NULL) &&
5594 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5595 ctxt->sax->entityDecl(ctxt->userData, name,
5596 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5597 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005598 /*
5599 * For expat compatibility in SAX mode.
5600 * assuming the entity repalcement was asked for
5601 */
5602 if ((ctxt->replaceEntities != 0) &&
5603 ((ctxt->myDoc == NULL) ||
5604 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5605 if (ctxt->myDoc == NULL) {
5606 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005607 if (ctxt->myDoc == NULL) {
5608 xmlErrMemory(ctxt, "New Doc failed");
5609 return;
5610 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005611 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005612 }
5613
5614 if (ctxt->myDoc->intSubset == NULL)
5615 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5616 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005617 xmlSAX2EntityDecl(ctxt, name,
5618 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5619 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005620 }
Owen Taylor3473f882001-02-23 17:55:21 +00005621 }
5622 }
5623 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005624 if (ctxt->instate == XML_PARSER_EOF)
5625 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005626 SKIP_BLANKS;
5627 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005628 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005629 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005630 } else {
5631 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005632 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5633 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005634 }
5635 NEXT;
5636 }
5637 if (orig != NULL) {
5638 /*
5639 * Ugly mechanism to save the raw entity value.
5640 */
5641 xmlEntityPtr cur = NULL;
5642
5643 if (isParameter) {
5644 if ((ctxt->sax != NULL) &&
5645 (ctxt->sax->getParameterEntity != NULL))
5646 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5647 } else {
5648 if ((ctxt->sax != NULL) &&
5649 (ctxt->sax->getEntity != NULL))
5650 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005651 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005652 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005653 }
Owen Taylor3473f882001-02-23 17:55:21 +00005654 }
5655 if (cur != NULL) {
5656 if (cur->orig != NULL)
5657 xmlFree(orig);
5658 else
5659 cur->orig = orig;
5660 } else
5661 xmlFree(orig);
5662 }
Owen Taylor3473f882001-02-23 17:55:21 +00005663 if (value != NULL) xmlFree(value);
5664 if (URI != NULL) xmlFree(URI);
5665 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005666 }
5667}
5668
5669/**
5670 * xmlParseDefaultDecl:
5671 * @ctxt: an XML parser context
5672 * @value: Receive a possible fixed default value for the attribute
5673 *
5674 * Parse an attribute default declaration
5675 *
5676 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5677 *
5678 * [ VC: Required Attribute ]
5679 * if the default declaration is the keyword #REQUIRED, then the
5680 * attribute must be specified for all elements of the type in the
5681 * attribute-list declaration.
5682 *
5683 * [ VC: Attribute Default Legal ]
5684 * The declared default value must meet the lexical constraints of
5685 * the declared attribute type c.f. xmlValidateAttributeDecl()
5686 *
5687 * [ VC: Fixed Attribute Default ]
5688 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005689 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005690 *
5691 * [ WFC: No < in Attribute Values ]
5692 * handled in xmlParseAttValue()
5693 *
5694 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005695 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005696 */
5697
5698int
5699xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5700 int val;
5701 xmlChar *ret;
5702
5703 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005704 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005705 SKIP(9);
5706 return(XML_ATTRIBUTE_REQUIRED);
5707 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005708 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005709 SKIP(8);
5710 return(XML_ATTRIBUTE_IMPLIED);
5711 }
5712 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005713 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005714 SKIP(6);
5715 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005716 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005717 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5718 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005719 }
5720 SKIP_BLANKS;
5721 }
5722 ret = xmlParseAttValue(ctxt);
5723 ctxt->instate = XML_PARSER_DTD;
5724 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005725 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005726 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005727 } else
5728 *value = ret;
5729 return(val);
5730}
5731
5732/**
5733 * xmlParseNotationType:
5734 * @ctxt: an XML parser context
5735 *
5736 * parse an Notation attribute type.
5737 *
5738 * Note: the leading 'NOTATION' S part has already being parsed...
5739 *
5740 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5741 *
5742 * [ VC: Notation Attributes ]
5743 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005744 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005745 *
5746 * Returns: the notation attribute tree built while parsing
5747 */
5748
5749xmlEnumerationPtr
5750xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005751 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005752 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005753
5754 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005755 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005756 return(NULL);
5757 }
5758 SHRINK;
5759 do {
5760 NEXT;
5761 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005762 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005763 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005764 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5765 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005766 xmlFreeEnumeration(ret);
5767 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005768 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005769 tmp = ret;
5770 while (tmp != NULL) {
5771 if (xmlStrEqual(name, tmp->name)) {
5772 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5773 "standalone: attribute notation value token %s duplicated\n",
5774 name, NULL);
5775 if (!xmlDictOwns(ctxt->dict, name))
5776 xmlFree((xmlChar *) name);
5777 break;
5778 }
5779 tmp = tmp->next;
5780 }
5781 if (tmp == NULL) {
5782 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005783 if (cur == NULL) {
5784 xmlFreeEnumeration(ret);
5785 return(NULL);
5786 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005787 if (last == NULL) ret = last = cur;
5788 else {
5789 last->next = cur;
5790 last = cur;
5791 }
Owen Taylor3473f882001-02-23 17:55:21 +00005792 }
5793 SKIP_BLANKS;
5794 } while (RAW == '|');
5795 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005796 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005797 xmlFreeEnumeration(ret);
5798 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005799 }
5800 NEXT;
5801 return(ret);
5802}
5803
5804/**
5805 * xmlParseEnumerationType:
5806 * @ctxt: an XML parser context
5807 *
5808 * parse an Enumeration attribute type.
5809 *
5810 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5811 *
5812 * [ VC: Enumeration ]
5813 * Values of this type must match one of the Nmtoken tokens in
5814 * the declaration
5815 *
5816 * Returns: the enumeration attribute tree built while parsing
5817 */
5818
5819xmlEnumerationPtr
5820xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5821 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005822 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005823
5824 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005825 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005826 return(NULL);
5827 }
5828 SHRINK;
5829 do {
5830 NEXT;
5831 SKIP_BLANKS;
5832 name = xmlParseNmtoken(ctxt);
5833 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005834 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005835 return(ret);
5836 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005837 tmp = ret;
5838 while (tmp != NULL) {
5839 if (xmlStrEqual(name, tmp->name)) {
5840 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5841 "standalone: attribute enumeration value token %s duplicated\n",
5842 name, NULL);
5843 if (!xmlDictOwns(ctxt->dict, name))
5844 xmlFree(name);
5845 break;
5846 }
5847 tmp = tmp->next;
5848 }
5849 if (tmp == NULL) {
5850 cur = xmlCreateEnumeration(name);
5851 if (!xmlDictOwns(ctxt->dict, name))
5852 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005853 if (cur == NULL) {
5854 xmlFreeEnumeration(ret);
5855 return(NULL);
5856 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005857 if (last == NULL) ret = last = cur;
5858 else {
5859 last->next = cur;
5860 last = cur;
5861 }
Owen Taylor3473f882001-02-23 17:55:21 +00005862 }
5863 SKIP_BLANKS;
5864 } while (RAW == '|');
5865 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005866 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005867 return(ret);
5868 }
5869 NEXT;
5870 return(ret);
5871}
5872
5873/**
5874 * xmlParseEnumeratedType:
5875 * @ctxt: an XML parser context
5876 * @tree: the enumeration tree built while parsing
5877 *
5878 * parse an Enumerated attribute type.
5879 *
5880 * [57] EnumeratedType ::= NotationType | Enumeration
5881 *
5882 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5883 *
5884 *
5885 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5886 */
5887
5888int
5889xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005890 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005891 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005892 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005893 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5894 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005895 return(0);
5896 }
5897 SKIP_BLANKS;
5898 *tree = xmlParseNotationType(ctxt);
5899 if (*tree == NULL) return(0);
5900 return(XML_ATTRIBUTE_NOTATION);
5901 }
5902 *tree = xmlParseEnumerationType(ctxt);
5903 if (*tree == NULL) return(0);
5904 return(XML_ATTRIBUTE_ENUMERATION);
5905}
5906
5907/**
5908 * xmlParseAttributeType:
5909 * @ctxt: an XML parser context
5910 * @tree: the enumeration tree built while parsing
5911 *
5912 * parse the Attribute list def for an element
5913 *
5914 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5915 *
5916 * [55] StringType ::= 'CDATA'
5917 *
5918 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5919 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5920 *
5921 * Validity constraints for attribute values syntax are checked in
5922 * xmlValidateAttributeValue()
5923 *
5924 * [ VC: ID ]
5925 * Values of type ID must match the Name production. A name must not
5926 * appear more than once in an XML document as a value of this type;
5927 * i.e., ID values must uniquely identify the elements which bear them.
5928 *
5929 * [ VC: One ID per Element Type ]
5930 * No element type may have more than one ID attribute specified.
5931 *
5932 * [ VC: ID Attribute Default ]
5933 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5934 *
5935 * [ VC: IDREF ]
5936 * Values of type IDREF must match the Name production, and values
5937 * of type IDREFS must match Names; each IDREF Name must match the value
5938 * of an ID attribute on some element in the XML document; i.e. IDREF
5939 * values must match the value of some ID attribute.
5940 *
5941 * [ VC: Entity Name ]
5942 * Values of type ENTITY must match the Name production, values
5943 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005944 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005945 *
5946 * [ VC: Name Token ]
5947 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005948 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005949 *
5950 * Returns the attribute type
5951 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005952int
Owen Taylor3473f882001-02-23 17:55:21 +00005953xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5954 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005955 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005956 SKIP(5);
5957 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005958 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005959 SKIP(6);
5960 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005961 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005962 SKIP(5);
5963 return(XML_ATTRIBUTE_IDREF);
5964 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5965 SKIP(2);
5966 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005967 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005968 SKIP(6);
5969 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005970 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005971 SKIP(8);
5972 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005973 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005974 SKIP(8);
5975 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005976 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005977 SKIP(7);
5978 return(XML_ATTRIBUTE_NMTOKEN);
5979 }
5980 return(xmlParseEnumeratedType(ctxt, tree));
5981}
5982
5983/**
5984 * xmlParseAttributeListDecl:
5985 * @ctxt: an XML parser context
5986 *
5987 * : parse the Attribute list def for an element
5988 *
5989 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5990 *
5991 * [53] AttDef ::= S Name S AttType S DefaultDecl
5992 *
5993 */
5994void
5995xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005996 const xmlChar *elemName;
5997 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005998 xmlEnumerationPtr tree;
5999
Daniel Veillarda07050d2003-10-19 14:46:32 +00006000 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006001 xmlParserInputPtr input = ctxt->input;
6002
6003 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006004 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006005 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006006 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006007 }
6008 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006009 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006010 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006011 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6012 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006013 return;
6014 }
6015 SKIP_BLANKS;
6016 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006017 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006018 const xmlChar *check = CUR_PTR;
6019 int type;
6020 int def;
6021 xmlChar *defaultValue = NULL;
6022
6023 GROW;
6024 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006025 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006026 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006027 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6028 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006029 break;
6030 }
6031 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006032 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006033 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006034 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006035 break;
6036 }
6037 SKIP_BLANKS;
6038
6039 type = xmlParseAttributeType(ctxt, &tree);
6040 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006041 break;
6042 }
6043
6044 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006045 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006046 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6047 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006048 if (tree != NULL)
6049 xmlFreeEnumeration(tree);
6050 break;
6051 }
6052 SKIP_BLANKS;
6053
6054 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6055 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006056 if (defaultValue != NULL)
6057 xmlFree(defaultValue);
6058 if (tree != NULL)
6059 xmlFreeEnumeration(tree);
6060 break;
6061 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006062 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6063 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006064
6065 GROW;
6066 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006067 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006068 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006069 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006070 if (defaultValue != NULL)
6071 xmlFree(defaultValue);
6072 if (tree != NULL)
6073 xmlFreeEnumeration(tree);
6074 break;
6075 }
6076 SKIP_BLANKS;
6077 }
6078 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006079 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6080 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006081 if (defaultValue != NULL)
6082 xmlFree(defaultValue);
6083 if (tree != NULL)
6084 xmlFreeEnumeration(tree);
6085 break;
6086 }
6087 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6088 (ctxt->sax->attributeDecl != NULL))
6089 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6090 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006091 else if (tree != NULL)
6092 xmlFreeEnumeration(tree);
6093
6094 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006095 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006096 (def != XML_ATTRIBUTE_REQUIRED)) {
6097 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6098 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006099 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006100 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6101 }
Owen Taylor3473f882001-02-23 17:55:21 +00006102 if (defaultValue != NULL)
6103 xmlFree(defaultValue);
6104 GROW;
6105 }
6106 if (RAW == '>') {
6107 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006108 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6109 "Attribute list declaration doesn't start and stop in the same entity\n",
6110 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006111 }
6112 NEXT;
6113 }
Owen Taylor3473f882001-02-23 17:55:21 +00006114 }
6115}
6116
6117/**
6118 * xmlParseElementMixedContentDecl:
6119 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006120 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006121 *
6122 * parse the declaration for a Mixed Element content
6123 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006124 *
Owen Taylor3473f882001-02-23 17:55:21 +00006125 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6126 * '(' S? '#PCDATA' S? ')'
6127 *
6128 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6129 *
6130 * [ VC: No Duplicate Types ]
6131 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006132 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006133 *
6134 * returns: the list of the xmlElementContentPtr describing the element choices
6135 */
6136xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006137xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006138 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006139 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006140
6141 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006142 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006143 SKIP(7);
6144 SKIP_BLANKS;
6145 SHRINK;
6146 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006147 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006148 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6149"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006150 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006151 }
Owen Taylor3473f882001-02-23 17:55:21 +00006152 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006153 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006154 if (ret == NULL)
6155 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006156 if (RAW == '*') {
6157 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6158 NEXT;
6159 }
6160 return(ret);
6161 }
6162 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006163 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006164 if (ret == NULL) return(NULL);
6165 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006166 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006167 NEXT;
6168 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006169 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006170 if (ret == NULL) return(NULL);
6171 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006172 if (cur != NULL)
6173 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006174 cur = ret;
6175 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006176 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006177 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006178 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006179 if (n->c1 != NULL)
6180 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006181 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006182 if (n != NULL)
6183 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006184 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006185 }
6186 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006187 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006188 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006189 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006190 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006191 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006192 return(NULL);
6193 }
6194 SKIP_BLANKS;
6195 GROW;
6196 }
6197 if ((RAW == ')') && (NXT(1) == '*')) {
6198 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006199 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006200 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006201 if (cur->c2 != NULL)
6202 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006203 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006204 if (ret != NULL)
6205 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006206 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006207 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6208"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006209 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006210 }
Owen Taylor3473f882001-02-23 17:55:21 +00006211 SKIP(2);
6212 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006213 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006214 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006215 return(NULL);
6216 }
6217
6218 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006219 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006220 }
6221 return(ret);
6222}
6223
6224/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006225 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006226 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006227 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006228 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006229 *
6230 * parse the declaration for a Mixed Element content
6231 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006232 *
Owen Taylor3473f882001-02-23 17:55:21 +00006233 *
6234 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6235 *
6236 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6237 *
6238 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6239 *
6240 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6241 *
6242 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6243 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006244 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006245 * opening or closing parentheses in a choice, seq, or Mixed
6246 * construct is contained in the replacement text for a parameter
6247 * entity, both must be contained in the same replacement text. For
6248 * interoperability, if a parameter-entity reference appears in a
6249 * choice, seq, or Mixed construct, its replacement text should not
6250 * be empty, and neither the first nor last non-blank character of
6251 * the replacement text should be a connector (| or ,).
6252 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006253 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006254 * hierarchy.
6255 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006256static xmlElementContentPtr
6257xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6258 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006259 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006260 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006261 xmlChar type = 0;
6262
Daniel Veillard489f9672009-08-10 16:49:30 +02006263 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6264 (depth > 2048)) {
6265 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6266"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6267 depth);
6268 return(NULL);
6269 }
Owen Taylor3473f882001-02-23 17:55:21 +00006270 SKIP_BLANKS;
6271 GROW;
6272 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006273 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006274
Owen Taylor3473f882001-02-23 17:55:21 +00006275 /* Recurse on first child */
6276 NEXT;
6277 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006278 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6279 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006280 SKIP_BLANKS;
6281 GROW;
6282 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006283 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006284 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006285 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006286 return(NULL);
6287 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006288 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006289 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006290 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006291 return(NULL);
6292 }
Owen Taylor3473f882001-02-23 17:55:21 +00006293 GROW;
6294 if (RAW == '?') {
6295 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6296 NEXT;
6297 } else if (RAW == '*') {
6298 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6299 NEXT;
6300 } else if (RAW == '+') {
6301 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6302 NEXT;
6303 } else {
6304 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6305 }
Owen Taylor3473f882001-02-23 17:55:21 +00006306 GROW;
6307 }
6308 SKIP_BLANKS;
6309 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006310 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006311 /*
6312 * Each loop we parse one separator and one element.
6313 */
6314 if (RAW == ',') {
6315 if (type == 0) type = CUR;
6316
6317 /*
6318 * Detect "Name | Name , Name" error
6319 */
6320 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006321 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006322 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006323 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006324 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006325 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006326 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006327 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006328 return(NULL);
6329 }
6330 NEXT;
6331
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006332 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006333 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006334 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006335 xmlFreeDocElementContent(ctxt->myDoc, last);
6336 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 return(NULL);
6338 }
6339 if (last == NULL) {
6340 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006341 if (ret != NULL)
6342 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006343 ret = cur = op;
6344 } else {
6345 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006346 if (op != NULL)
6347 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006348 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006349 if (last != NULL)
6350 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006351 cur =op;
6352 last = NULL;
6353 }
6354 } else if (RAW == '|') {
6355 if (type == 0) type = CUR;
6356
6357 /*
6358 * Detect "Name , Name | Name" error
6359 */
6360 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006361 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006362 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006363 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006364 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006365 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006366 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006367 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006368 return(NULL);
6369 }
6370 NEXT;
6371
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006372 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006373 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006374 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006375 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006376 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006377 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006378 return(NULL);
6379 }
6380 if (last == NULL) {
6381 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006382 if (ret != NULL)
6383 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006384 ret = cur = op;
6385 } else {
6386 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006387 if (op != NULL)
6388 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006389 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006390 if (last != NULL)
6391 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006392 cur =op;
6393 last = NULL;
6394 }
6395 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006396 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006397 if ((last != NULL) && (last != ret))
6398 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006399 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006400 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006401 return(NULL);
6402 }
6403 GROW;
6404 SKIP_BLANKS;
6405 GROW;
6406 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006407 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006408 /* Recurse on second child */
6409 NEXT;
6410 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006411 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6412 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006413 SKIP_BLANKS;
6414 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006415 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006416 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006417 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006418 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006419 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006420 return(NULL);
6421 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006422 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006423 if (last == NULL) {
6424 if (ret != NULL)
6425 xmlFreeDocElementContent(ctxt->myDoc, ret);
6426 return(NULL);
6427 }
Owen Taylor3473f882001-02-23 17:55:21 +00006428 if (RAW == '?') {
6429 last->ocur = XML_ELEMENT_CONTENT_OPT;
6430 NEXT;
6431 } else if (RAW == '*') {
6432 last->ocur = XML_ELEMENT_CONTENT_MULT;
6433 NEXT;
6434 } else if (RAW == '+') {
6435 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6436 NEXT;
6437 } else {
6438 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6439 }
6440 }
6441 SKIP_BLANKS;
6442 GROW;
6443 }
6444 if ((cur != NULL) && (last != NULL)) {
6445 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006446 if (last != NULL)
6447 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006448 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006449 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006450 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6451"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006452 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006453 }
Owen Taylor3473f882001-02-23 17:55:21 +00006454 NEXT;
6455 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006456 if (ret != NULL) {
6457 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6458 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6459 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6460 else
6461 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6462 }
Owen Taylor3473f882001-02-23 17:55:21 +00006463 NEXT;
6464 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006465 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006466 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006467 cur = ret;
6468 /*
6469 * Some normalization:
6470 * (a | b* | c?)* == (a | b | c)*
6471 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006472 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006473 if ((cur->c1 != NULL) &&
6474 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6475 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6476 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6477 if ((cur->c2 != NULL) &&
6478 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6479 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6480 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6481 cur = cur->c2;
6482 }
6483 }
Owen Taylor3473f882001-02-23 17:55:21 +00006484 NEXT;
6485 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006486 if (ret != NULL) {
6487 int found = 0;
6488
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006489 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6490 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6491 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006492 else
6493 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006494 /*
6495 * Some normalization:
6496 * (a | b*)+ == (a | b)*
6497 * (a | b?)+ == (a | b)*
6498 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006499 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006500 if ((cur->c1 != NULL) &&
6501 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6502 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6503 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6504 found = 1;
6505 }
6506 if ((cur->c2 != NULL) &&
6507 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6508 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6509 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6510 found = 1;
6511 }
6512 cur = cur->c2;
6513 }
6514 if (found)
6515 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6516 }
Owen Taylor3473f882001-02-23 17:55:21 +00006517 NEXT;
6518 }
6519 return(ret);
6520}
6521
6522/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006523 * xmlParseElementChildrenContentDecl:
6524 * @ctxt: an XML parser context
6525 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006526 *
6527 * parse the declaration for a Mixed Element content
6528 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6529 *
6530 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6531 *
6532 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6533 *
6534 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6535 *
6536 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6537 *
6538 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6539 * TODO Parameter-entity replacement text must be properly nested
6540 * with parenthesized groups. That is to say, if either of the
6541 * opening or closing parentheses in a choice, seq, or Mixed
6542 * construct is contained in the replacement text for a parameter
6543 * entity, both must be contained in the same replacement text. For
6544 * interoperability, if a parameter-entity reference appears in a
6545 * choice, seq, or Mixed construct, its replacement text should not
6546 * be empty, and neither the first nor last non-blank character of
6547 * the replacement text should be a connector (| or ,).
6548 *
6549 * Returns the tree of xmlElementContentPtr describing the element
6550 * hierarchy.
6551 */
6552xmlElementContentPtr
6553xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6554 /* stub left for API/ABI compat */
6555 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6556}
6557
6558/**
Owen Taylor3473f882001-02-23 17:55:21 +00006559 * xmlParseElementContentDecl:
6560 * @ctxt: an XML parser context
6561 * @name: the name of the element being defined.
6562 * @result: the Element Content pointer will be stored here if any
6563 *
6564 * parse the declaration for an Element content either Mixed or Children,
6565 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006566 *
Owen Taylor3473f882001-02-23 17:55:21 +00006567 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6568 *
6569 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6570 */
6571
6572int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006573xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006574 xmlElementContentPtr *result) {
6575
6576 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006577 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006578 int res;
6579
6580 *result = NULL;
6581
6582 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006583 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006584 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006585 return(-1);
6586 }
6587 NEXT;
6588 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006589 if (ctxt->instate == XML_PARSER_EOF)
6590 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006591 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006592 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006593 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006594 res = XML_ELEMENT_TYPE_MIXED;
6595 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006596 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006597 res = XML_ELEMENT_TYPE_ELEMENT;
6598 }
Owen Taylor3473f882001-02-23 17:55:21 +00006599 SKIP_BLANKS;
6600 *result = tree;
6601 return(res);
6602}
6603
6604/**
6605 * xmlParseElementDecl:
6606 * @ctxt: an XML parser context
6607 *
6608 * parse an Element declaration.
6609 *
6610 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6611 *
6612 * [ VC: Unique Element Type Declaration ]
6613 * No element type may be declared more than once
6614 *
6615 * Returns the type of the element, or -1 in case of error
6616 */
6617int
6618xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006619 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006620 int ret = -1;
6621 xmlElementContentPtr content = NULL;
6622
Daniel Veillard4c778d82005-01-23 17:37:44 +00006623 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006624 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006625 xmlParserInputPtr input = ctxt->input;
6626
6627 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006628 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006629 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6630 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006631 }
6632 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006633 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006634 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006635 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6636 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006637 return(-1);
6638 }
6639 while ((RAW == 0) && (ctxt->inputNr > 1))
6640 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006641 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006642 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6643 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006644 }
6645 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006646 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006647 SKIP(5);
6648 /*
6649 * Element must always be empty.
6650 */
6651 ret = XML_ELEMENT_TYPE_EMPTY;
6652 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6653 (NXT(2) == 'Y')) {
6654 SKIP(3);
6655 /*
6656 * Element is a generic container.
6657 */
6658 ret = XML_ELEMENT_TYPE_ANY;
6659 } else if (RAW == '(') {
6660 ret = xmlParseElementContentDecl(ctxt, name, &content);
6661 } else {
6662 /*
6663 * [ WFC: PEs in Internal Subset ] error handling.
6664 */
6665 if ((RAW == '%') && (ctxt->external == 0) &&
6666 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006667 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006668 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006669 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006670 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006671 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6672 }
Owen Taylor3473f882001-02-23 17:55:21 +00006673 return(-1);
6674 }
6675
6676 SKIP_BLANKS;
6677 /*
6678 * Pop-up of finished entities.
6679 */
6680 while ((RAW == 0) && (ctxt->inputNr > 1))
6681 xmlPopInput(ctxt);
6682 SKIP_BLANKS;
6683
6684 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006685 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006686 if (content != NULL) {
6687 xmlFreeDocElementContent(ctxt->myDoc, content);
6688 }
Owen Taylor3473f882001-02-23 17:55:21 +00006689 } else {
6690 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006691 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6692 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006693 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006694
Owen Taylor3473f882001-02-23 17:55:21 +00006695 NEXT;
6696 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006697 (ctxt->sax->elementDecl != NULL)) {
6698 if (content != NULL)
6699 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006700 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6701 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006702 if ((content != NULL) && (content->parent == NULL)) {
6703 /*
6704 * this is a trick: if xmlAddElementDecl is called,
6705 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006706 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006707 * interfaces or change the API/ABI
6708 */
6709 xmlFreeDocElementContent(ctxt->myDoc, content);
6710 }
6711 } else if (content != NULL) {
6712 xmlFreeDocElementContent(ctxt->myDoc, content);
6713 }
Owen Taylor3473f882001-02-23 17:55:21 +00006714 }
Owen Taylor3473f882001-02-23 17:55:21 +00006715 }
6716 return(ret);
6717}
6718
6719/**
Owen Taylor3473f882001-02-23 17:55:21 +00006720 * xmlParseConditionalSections
6721 * @ctxt: an XML parser context
6722 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006723 * [61] conditionalSect ::= includeSect | ignoreSect
6724 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006725 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6726 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6727 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6728 */
6729
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006730static void
Owen Taylor3473f882001-02-23 17:55:21 +00006731xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006732 int id = ctxt->input->id;
6733
Owen Taylor3473f882001-02-23 17:55:21 +00006734 SKIP(3);
6735 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006736 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006737 SKIP(7);
6738 SKIP_BLANKS;
6739 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006740 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006741 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006742 if (ctxt->input->id != id) {
6743 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6744 "All markup of the conditional section is not in the same entity\n",
6745 NULL, NULL);
6746 }
Owen Taylor3473f882001-02-23 17:55:21 +00006747 NEXT;
6748 }
6749 if (xmlParserDebugEntities) {
6750 if ((ctxt->input != NULL) && (ctxt->input->filename))
6751 xmlGenericError(xmlGenericErrorContext,
6752 "%s(%d): ", ctxt->input->filename,
6753 ctxt->input->line);
6754 xmlGenericError(xmlGenericErrorContext,
6755 "Entering INCLUDE Conditional Section\n");
6756 }
6757
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006758 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6759 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006760 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006761 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006762
6763 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6764 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006765 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006766 NEXT;
6767 } else if (RAW == '%') {
6768 xmlParsePEReference(ctxt);
6769 } else
6770 xmlParseMarkupDecl(ctxt);
6771
6772 /*
6773 * Pop-up of finished entities.
6774 */
6775 while ((RAW == 0) && (ctxt->inputNr > 1))
6776 xmlPopInput(ctxt);
6777
Daniel Veillardfdc91562002-07-01 21:52:03 +00006778 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006779 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006780 break;
6781 }
6782 }
6783 if (xmlParserDebugEntities) {
6784 if ((ctxt->input != NULL) && (ctxt->input->filename))
6785 xmlGenericError(xmlGenericErrorContext,
6786 "%s(%d): ", ctxt->input->filename,
6787 ctxt->input->line);
6788 xmlGenericError(xmlGenericErrorContext,
6789 "Leaving INCLUDE Conditional Section\n");
6790 }
6791
Daniel Veillarda07050d2003-10-19 14:46:32 +00006792 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006793 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006794 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006795 int depth = 0;
6796
6797 SKIP(6);
6798 SKIP_BLANKS;
6799 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006800 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006801 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006802 if (ctxt->input->id != id) {
6803 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6804 "All markup of the conditional section is not in the same entity\n",
6805 NULL, NULL);
6806 }
Owen Taylor3473f882001-02-23 17:55:21 +00006807 NEXT;
6808 }
6809 if (xmlParserDebugEntities) {
6810 if ((ctxt->input != NULL) && (ctxt->input->filename))
6811 xmlGenericError(xmlGenericErrorContext,
6812 "%s(%d): ", ctxt->input->filename,
6813 ctxt->input->line);
6814 xmlGenericError(xmlGenericErrorContext,
6815 "Entering IGNORE Conditional Section\n");
6816 }
6817
6818 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006819 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006820 * But disable SAX event generating DTD building in the meantime
6821 */
6822 state = ctxt->disableSAX;
6823 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006824 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006825 ctxt->instate = XML_PARSER_IGNORE;
6826
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006827 while (((depth >= 0) && (RAW != 0)) &&
6828 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006829 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6830 depth++;
6831 SKIP(3);
6832 continue;
6833 }
6834 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6835 if (--depth >= 0) SKIP(3);
6836 continue;
6837 }
6838 NEXT;
6839 continue;
6840 }
6841
6842 ctxt->disableSAX = state;
6843 ctxt->instate = instate;
6844
6845 if (xmlParserDebugEntities) {
6846 if ((ctxt->input != NULL) && (ctxt->input->filename))
6847 xmlGenericError(xmlGenericErrorContext,
6848 "%s(%d): ", ctxt->input->filename,
6849 ctxt->input->line);
6850 xmlGenericError(xmlGenericErrorContext,
6851 "Leaving IGNORE Conditional Section\n");
6852 }
6853
6854 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006855 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006856 }
6857
6858 if (RAW == 0)
6859 SHRINK;
6860
6861 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006862 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006863 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006864 if (ctxt->input->id != id) {
6865 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6866 "All markup of the conditional section is not in the same entity\n",
6867 NULL, NULL);
6868 }
Owen Taylor3473f882001-02-23 17:55:21 +00006869 SKIP(3);
6870 }
6871}
6872
6873/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006874 * xmlParseMarkupDecl:
6875 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006876 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006877 * parse Markup declarations
6878 *
6879 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6880 * NotationDecl | PI | Comment
6881 *
6882 * [ VC: Proper Declaration/PE Nesting ]
6883 * Parameter-entity replacement text must be properly nested with
6884 * markup declarations. That is to say, if either the first character
6885 * or the last character of a markup declaration (markupdecl above) is
6886 * contained in the replacement text for a parameter-entity reference,
6887 * both must be contained in the same replacement text.
6888 *
6889 * [ WFC: PEs in Internal Subset ]
6890 * In the internal DTD subset, parameter-entity references can occur
6891 * only where markup declarations can occur, not within markup declarations.
6892 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006893 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006894 */
6895void
6896xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6897 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006898 if (CUR == '<') {
6899 if (NXT(1) == '!') {
6900 switch (NXT(2)) {
6901 case 'E':
6902 if (NXT(3) == 'L')
6903 xmlParseElementDecl(ctxt);
6904 else if (NXT(3) == 'N')
6905 xmlParseEntityDecl(ctxt);
6906 break;
6907 case 'A':
6908 xmlParseAttributeListDecl(ctxt);
6909 break;
6910 case 'N':
6911 xmlParseNotationDecl(ctxt);
6912 break;
6913 case '-':
6914 xmlParseComment(ctxt);
6915 break;
6916 default:
6917 /* there is an error but it will be detected later */
6918 break;
6919 }
6920 } else if (NXT(1) == '?') {
6921 xmlParsePI(ctxt);
6922 }
6923 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006924 /*
6925 * This is only for internal subset. On external entities,
6926 * the replacement is done before parsing stage
6927 */
6928 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6929 xmlParsePEReference(ctxt);
6930
6931 /*
6932 * Conditional sections are allowed from entities included
6933 * by PE References in the internal subset.
6934 */
6935 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6936 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6937 xmlParseConditionalSections(ctxt);
6938 }
6939 }
6940
6941 ctxt->instate = XML_PARSER_DTD;
6942}
6943
6944/**
6945 * xmlParseTextDecl:
6946 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006947 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006948 * parse an XML declaration header for external entities
6949 *
6950 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006951 */
6952
6953void
6954xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6955 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006956 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006957
6958 /*
6959 * We know that '<?xml' is here.
6960 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006961 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006962 SKIP(5);
6963 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006964 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006965 return;
6966 }
6967
William M. Brack76e95df2003-10-18 16:20:14 +00006968 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006969 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6970 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006971 }
6972 SKIP_BLANKS;
6973
6974 /*
6975 * We may have the VersionInfo here.
6976 */
6977 version = xmlParseVersionInfo(ctxt);
6978 if (version == NULL)
6979 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006980 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006981 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006982 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6983 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006984 }
6985 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006986 ctxt->input->version = version;
6987
6988 /*
6989 * We must have the encoding declaration
6990 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006991 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006992 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6993 /*
6994 * The XML REC instructs us to stop parsing right here
6995 */
6996 return;
6997 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006998 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6999 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7000 "Missing encoding in text declaration\n");
7001 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007002
7003 SKIP_BLANKS;
7004 if ((RAW == '?') && (NXT(1) == '>')) {
7005 SKIP(2);
7006 } else if (RAW == '>') {
7007 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007008 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007009 NEXT;
7010 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007011 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007012 MOVETO_ENDTAG(CUR_PTR);
7013 NEXT;
7014 }
7015}
7016
7017/**
Owen Taylor3473f882001-02-23 17:55:21 +00007018 * xmlParseExternalSubset:
7019 * @ctxt: an XML parser context
7020 * @ExternalID: the external identifier
7021 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007022 *
Owen Taylor3473f882001-02-23 17:55:21 +00007023 * parse Markup declarations from an external subset
7024 *
7025 * [30] extSubset ::= textDecl? extSubsetDecl
7026 *
7027 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7028 */
7029void
7030xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7031 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007032 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007033 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007034
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007035 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007036 (ctxt->input->end - ctxt->input->cur >= 4)) {
7037 xmlChar start[4];
7038 xmlCharEncoding enc;
7039
7040 start[0] = RAW;
7041 start[1] = NXT(1);
7042 start[2] = NXT(2);
7043 start[3] = NXT(3);
7044 enc = xmlDetectCharEncoding(start, 4);
7045 if (enc != XML_CHAR_ENCODING_NONE)
7046 xmlSwitchEncoding(ctxt, enc);
7047 }
7048
Daniel Veillarda07050d2003-10-19 14:46:32 +00007049 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007050 xmlParseTextDecl(ctxt);
7051 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7052 /*
7053 * The XML REC instructs us to stop parsing right here
7054 */
7055 ctxt->instate = XML_PARSER_EOF;
7056 return;
7057 }
7058 }
7059 if (ctxt->myDoc == NULL) {
7060 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007061 if (ctxt->myDoc == NULL) {
7062 xmlErrMemory(ctxt, "New Doc failed");
7063 return;
7064 }
7065 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007066 }
7067 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7068 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7069
7070 ctxt->instate = XML_PARSER_DTD;
7071 ctxt->external = 1;
7072 while (((RAW == '<') && (NXT(1) == '?')) ||
7073 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007074 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007075 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007076 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007077
7078 GROW;
7079 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7080 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007081 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007082 NEXT;
7083 } else if (RAW == '%') {
7084 xmlParsePEReference(ctxt);
7085 } else
7086 xmlParseMarkupDecl(ctxt);
7087
7088 /*
7089 * Pop-up of finished entities.
7090 */
7091 while ((RAW == 0) && (ctxt->inputNr > 1))
7092 xmlPopInput(ctxt);
7093
Daniel Veillardfdc91562002-07-01 21:52:03 +00007094 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007095 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007096 break;
7097 }
7098 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007099
Owen Taylor3473f882001-02-23 17:55:21 +00007100 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007101 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007102 }
7103
7104}
7105
7106/**
7107 * xmlParseReference:
7108 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007109 *
Owen Taylor3473f882001-02-23 17:55:21 +00007110 * parse and handle entity references in content, depending on the SAX
7111 * interface, this may end-up in a call to character() if this is a
7112 * CharRef, a predefined entity, if there is no reference() callback.
7113 * or if the parser was asked to switch to that mode.
7114 *
7115 * [67] Reference ::= EntityRef | CharRef
7116 */
7117void
7118xmlParseReference(xmlParserCtxtPtr ctxt) {
7119 xmlEntityPtr ent;
7120 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007121 int was_checked;
7122 xmlNodePtr list = NULL;
7123 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007124
Daniel Veillard0161e632008-08-28 15:36:32 +00007125
7126 if (RAW != '&')
7127 return;
7128
7129 /*
7130 * Simple case of a CharRef
7131 */
Owen Taylor3473f882001-02-23 17:55:21 +00007132 if (NXT(1) == '#') {
7133 int i = 0;
7134 xmlChar out[10];
7135 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007136 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007137
Daniel Veillarddc171602008-03-26 17:41:38 +00007138 if (value == 0)
7139 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007140 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7141 /*
7142 * So we are using non-UTF-8 buffers
7143 * Check that the char fit on 8bits, if not
7144 * generate a CharRef.
7145 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007146 if (value <= 0xFF) {
7147 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007148 out[1] = 0;
7149 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7150 (!ctxt->disableSAX))
7151 ctxt->sax->characters(ctxt->userData, out, 1);
7152 } else {
7153 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007154 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007155 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007156 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007157 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7158 (!ctxt->disableSAX))
7159 ctxt->sax->reference(ctxt->userData, out);
7160 }
7161 } else {
7162 /*
7163 * Just encode the value in UTF-8
7164 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007165 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007166 out[i] = 0;
7167 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7168 (!ctxt->disableSAX))
7169 ctxt->sax->characters(ctxt->userData, out, i);
7170 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007171 return;
7172 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007173
Daniel Veillard0161e632008-08-28 15:36:32 +00007174 /*
7175 * We are seeing an entity reference
7176 */
7177 ent = xmlParseEntityRef(ctxt);
7178 if (ent == NULL) return;
7179 if (!ctxt->wellFormed)
7180 return;
7181 was_checked = ent->checked;
7182
7183 /* special case of predefined entities */
7184 if ((ent->name == NULL) ||
7185 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7186 val = ent->content;
7187 if (val == NULL) return;
7188 /*
7189 * inline the entity.
7190 */
7191 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7192 (!ctxt->disableSAX))
7193 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7194 return;
7195 }
7196
7197 /*
7198 * The first reference to the entity trigger a parsing phase
7199 * where the ent->children is filled with the result from
7200 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007201 * Note: external parsed entities will not be loaded, it is not
7202 * required for a non-validating parser, unless the parsing option
7203 * of validating, or substituting entities were given. Doing so is
7204 * far more secure as the parser will only process data coming from
7205 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007206 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007207 if ((ent->checked == 0) &&
7208 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7209 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007210 unsigned long oldnbent = ctxt->nbentities;
7211
7212 /*
7213 * This is a bit hackish but this seems the best
7214 * way to make sure both SAX and DOM entity support
7215 * behaves okay.
7216 */
7217 void *user_data;
7218 if (ctxt->userData == ctxt)
7219 user_data = NULL;
7220 else
7221 user_data = ctxt->userData;
7222
7223 /*
7224 * Check that this entity is well formed
7225 * 4.3.2: An internal general parsed entity is well-formed
7226 * if its replacement text matches the production labeled
7227 * content.
7228 */
7229 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7230 ctxt->depth++;
7231 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7232 user_data, &list);
7233 ctxt->depth--;
7234
7235 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7236 ctxt->depth++;
7237 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7238 user_data, ctxt->depth, ent->URI,
7239 ent->ExternalID, &list);
7240 ctxt->depth--;
7241 } else {
7242 ret = XML_ERR_ENTITY_PE_INTERNAL;
7243 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7244 "invalid entity type found\n", NULL);
7245 }
7246
7247 /*
7248 * Store the number of entities needing parsing for this entity
7249 * content and do checkings
7250 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007251 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7252 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7253 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007254 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007255 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007256 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007257 return;
7258 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007259 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007260 xmlFreeNodeList(list);
7261 return;
7262 }
Owen Taylor3473f882001-02-23 17:55:21 +00007263
Daniel Veillard0161e632008-08-28 15:36:32 +00007264 if ((ret == XML_ERR_OK) && (list != NULL)) {
7265 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7266 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7267 (ent->children == NULL)) {
7268 ent->children = list;
7269 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007270 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007271 * Prune it directly in the generated document
7272 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007273 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007274 if (((list->type == XML_TEXT_NODE) &&
7275 (list->next == NULL)) ||
7276 (ctxt->parseMode == XML_PARSE_READER)) {
7277 list->parent = (xmlNodePtr) ent;
7278 list = NULL;
7279 ent->owner = 1;
7280 } else {
7281 ent->owner = 0;
7282 while (list != NULL) {
7283 list->parent = (xmlNodePtr) ctxt->node;
7284 list->doc = ctxt->myDoc;
7285 if (list->next == NULL)
7286 ent->last = list;
7287 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007288 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007289 list = ent->children;
7290#ifdef LIBXML_LEGACY_ENABLED
7291 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7292 xmlAddEntityReference(ent, list, NULL);
7293#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007294 }
7295 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007296 ent->owner = 1;
7297 while (list != NULL) {
7298 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007299 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007300 if (list->next == NULL)
7301 ent->last = list;
7302 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007303 }
7304 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007305 } else {
7306 xmlFreeNodeList(list);
7307 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007308 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007309 } else if ((ret != XML_ERR_OK) &&
7310 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7311 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7312 "Entity '%s' failed to parse\n", ent->name);
7313 } else if (list != NULL) {
7314 xmlFreeNodeList(list);
7315 list = NULL;
7316 }
7317 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007318 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007319 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007320 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007321 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007322
Daniel Veillard0161e632008-08-28 15:36:32 +00007323 /*
7324 * Now that the entity content has been gathered
7325 * provide it to the application, this can take different forms based
7326 * on the parsing modes.
7327 */
7328 if (ent->children == NULL) {
7329 /*
7330 * Probably running in SAX mode and the callbacks don't
7331 * build the entity content. So unless we already went
7332 * though parsing for first checking go though the entity
7333 * content to generate callbacks associated to the entity
7334 */
7335 if (was_checked != 0) {
7336 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007337 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007338 * This is a bit hackish but this seems the best
7339 * way to make sure both SAX and DOM entity support
7340 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007341 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007342 if (ctxt->userData == ctxt)
7343 user_data = NULL;
7344 else
7345 user_data = ctxt->userData;
7346
7347 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7348 ctxt->depth++;
7349 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7350 ent->content, user_data, NULL);
7351 ctxt->depth--;
7352 } else if (ent->etype ==
7353 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7354 ctxt->depth++;
7355 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7356 ctxt->sax, user_data, ctxt->depth,
7357 ent->URI, ent->ExternalID, NULL);
7358 ctxt->depth--;
7359 } else {
7360 ret = XML_ERR_ENTITY_PE_INTERNAL;
7361 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7362 "invalid entity type found\n", NULL);
7363 }
7364 if (ret == XML_ERR_ENTITY_LOOP) {
7365 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7366 return;
7367 }
7368 }
7369 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7370 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7371 /*
7372 * Entity reference callback comes second, it's somewhat
7373 * superfluous but a compatibility to historical behaviour
7374 */
7375 ctxt->sax->reference(ctxt->userData, ent->name);
7376 }
7377 return;
7378 }
7379
7380 /*
7381 * If we didn't get any children for the entity being built
7382 */
7383 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7384 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7385 /*
7386 * Create a node.
7387 */
7388 ctxt->sax->reference(ctxt->userData, ent->name);
7389 return;
7390 }
7391
7392 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7393 /*
7394 * There is a problem on the handling of _private for entities
7395 * (bug 155816): Should we copy the content of the field from
7396 * the entity (possibly overwriting some value set by the user
7397 * when a copy is created), should we leave it alone, or should
7398 * we try to take care of different situations? The problem
7399 * is exacerbated by the usage of this field by the xmlReader.
7400 * To fix this bug, we look at _private on the created node
7401 * and, if it's NULL, we copy in whatever was in the entity.
7402 * If it's not NULL we leave it alone. This is somewhat of a
7403 * hack - maybe we should have further tests to determine
7404 * what to do.
7405 */
7406 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7407 /*
7408 * Seems we are generating the DOM content, do
7409 * a simple tree copy for all references except the first
7410 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007411 */
7412 if (((list == NULL) && (ent->owner == 0)) ||
7413 (ctxt->parseMode == XML_PARSE_READER)) {
7414 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7415
7416 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007417 * We are copying here, make sure there is no abuse
7418 */
7419 ctxt->sizeentcopy += ent->length;
7420 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7421 return;
7422
7423 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007424 * when operating on a reader, the entities definitions
7425 * are always owning the entities subtree.
7426 if (ctxt->parseMode == XML_PARSE_READER)
7427 ent->owner = 1;
7428 */
7429
7430 cur = ent->children;
7431 while (cur != NULL) {
7432 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7433 if (nw != NULL) {
7434 if (nw->_private == NULL)
7435 nw->_private = cur->_private;
7436 if (firstChild == NULL){
7437 firstChild = nw;
7438 }
7439 nw = xmlAddChild(ctxt->node, nw);
7440 }
7441 if (cur == ent->last) {
7442 /*
7443 * needed to detect some strange empty
7444 * node cases in the reader tests
7445 */
7446 if ((ctxt->parseMode == XML_PARSE_READER) &&
7447 (nw != NULL) &&
7448 (nw->type == XML_ELEMENT_NODE) &&
7449 (nw->children == NULL))
7450 nw->extra = 1;
7451
7452 break;
7453 }
7454 cur = cur->next;
7455 }
7456#ifdef LIBXML_LEGACY_ENABLED
7457 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7458 xmlAddEntityReference(ent, firstChild, nw);
7459#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007460 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007461 xmlNodePtr nw = NULL, cur, next, last,
7462 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007463
7464 /*
7465 * We are copying here, make sure there is no abuse
7466 */
7467 ctxt->sizeentcopy += ent->length;
7468 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7469 return;
7470
Daniel Veillard0161e632008-08-28 15:36:32 +00007471 /*
7472 * Copy the entity child list and make it the new
7473 * entity child list. The goal is to make sure any
7474 * ID or REF referenced will be the one from the
7475 * document content and not the entity copy.
7476 */
7477 cur = ent->children;
7478 ent->children = NULL;
7479 last = ent->last;
7480 ent->last = NULL;
7481 while (cur != NULL) {
7482 next = cur->next;
7483 cur->next = NULL;
7484 cur->parent = NULL;
7485 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7486 if (nw != NULL) {
7487 if (nw->_private == NULL)
7488 nw->_private = cur->_private;
7489 if (firstChild == NULL){
7490 firstChild = cur;
7491 }
7492 xmlAddChild((xmlNodePtr) ent, nw);
7493 xmlAddChild(ctxt->node, cur);
7494 }
7495 if (cur == last)
7496 break;
7497 cur = next;
7498 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007499 if (ent->owner == 0)
7500 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007501#ifdef LIBXML_LEGACY_ENABLED
7502 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7503 xmlAddEntityReference(ent, firstChild, nw);
7504#endif /* LIBXML_LEGACY_ENABLED */
7505 } else {
7506 const xmlChar *nbktext;
7507
7508 /*
7509 * the name change is to avoid coalescing of the
7510 * node with a possible previous text one which
7511 * would make ent->children a dangling pointer
7512 */
7513 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7514 -1);
7515 if (ent->children->type == XML_TEXT_NODE)
7516 ent->children->name = nbktext;
7517 if ((ent->last != ent->children) &&
7518 (ent->last->type == XML_TEXT_NODE))
7519 ent->last->name = nbktext;
7520 xmlAddChildList(ctxt->node, ent->children);
7521 }
7522
7523 /*
7524 * This is to avoid a nasty side effect, see
7525 * characters() in SAX.c
7526 */
7527 ctxt->nodemem = 0;
7528 ctxt->nodelen = 0;
7529 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007530 }
7531 }
7532}
7533
7534/**
7535 * xmlParseEntityRef:
7536 * @ctxt: an XML parser context
7537 *
7538 * parse ENTITY references declarations
7539 *
7540 * [68] EntityRef ::= '&' Name ';'
7541 *
7542 * [ WFC: Entity Declared ]
7543 * In a document without any DTD, a document with only an internal DTD
7544 * subset which contains no parameter entity references, or a document
7545 * with "standalone='yes'", the Name given in the entity reference
7546 * must match that in an entity declaration, except that well-formed
7547 * documents need not declare any of the following entities: amp, lt,
7548 * gt, apos, quot. The declaration of a parameter entity must precede
7549 * any reference to it. Similarly, the declaration of a general entity
7550 * must precede any reference to it which appears in a default value in an
7551 * attribute-list declaration. Note that if entities are declared in the
7552 * external subset or in external parameter entities, a non-validating
7553 * processor is not obligated to read and process their declarations;
7554 * for such documents, the rule that an entity must be declared is a
7555 * well-formedness constraint only if standalone='yes'.
7556 *
7557 * [ WFC: Parsed Entity ]
7558 * An entity reference must not contain the name of an unparsed entity
7559 *
7560 * Returns the xmlEntityPtr if found, or NULL otherwise.
7561 */
7562xmlEntityPtr
7563xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007564 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007565 xmlEntityPtr ent = NULL;
7566
7567 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007568 if (ctxt->instate == XML_PARSER_EOF)
7569 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007570
Daniel Veillard0161e632008-08-28 15:36:32 +00007571 if (RAW != '&')
7572 return(NULL);
7573 NEXT;
7574 name = xmlParseName(ctxt);
7575 if (name == NULL) {
7576 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7577 "xmlParseEntityRef: no name\n");
7578 return(NULL);
7579 }
7580 if (RAW != ';') {
7581 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7582 return(NULL);
7583 }
7584 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007585
Daniel Veillard0161e632008-08-28 15:36:32 +00007586 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007587 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007588 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007589 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7590 ent = xmlGetPredefinedEntity(name);
7591 if (ent != NULL)
7592 return(ent);
7593 }
Owen Taylor3473f882001-02-23 17:55:21 +00007594
Daniel Veillard0161e632008-08-28 15:36:32 +00007595 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007596 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007597 */
7598 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007599
Daniel Veillard0161e632008-08-28 15:36:32 +00007600 /*
7601 * Ask first SAX for entity resolution, otherwise try the
7602 * entities which may have stored in the parser context.
7603 */
7604 if (ctxt->sax != NULL) {
7605 if (ctxt->sax->getEntity != NULL)
7606 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007607 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007608 (ctxt->options & XML_PARSE_OLDSAX))
7609 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007610 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7611 (ctxt->userData==ctxt)) {
7612 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007613 }
7614 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007615 if (ctxt->instate == XML_PARSER_EOF)
7616 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007617 /*
7618 * [ WFC: Entity Declared ]
7619 * In a document without any DTD, a document with only an
7620 * internal DTD subset which contains no parameter entity
7621 * references, or a document with "standalone='yes'", the
7622 * Name given in the entity reference must match that in an
7623 * entity declaration, except that well-formed documents
7624 * need not declare any of the following entities: amp, lt,
7625 * gt, apos, quot.
7626 * The declaration of a parameter entity must precede any
7627 * reference to it.
7628 * Similarly, the declaration of a general entity must
7629 * precede any reference to it which appears in a default
7630 * value in an attribute-list declaration. Note that if
7631 * entities are declared in the external subset or in
7632 * external parameter entities, a non-validating processor
7633 * is not obligated to read and process their declarations;
7634 * for such documents, the rule that an entity must be
7635 * declared is a well-formedness constraint only if
7636 * standalone='yes'.
7637 */
7638 if (ent == NULL) {
7639 if ((ctxt->standalone == 1) ||
7640 ((ctxt->hasExternalSubset == 0) &&
7641 (ctxt->hasPErefs == 0))) {
7642 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7643 "Entity '%s' not defined\n", name);
7644 } else {
7645 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7646 "Entity '%s' not defined\n", name);
7647 if ((ctxt->inSubset == 0) &&
7648 (ctxt->sax != NULL) &&
7649 (ctxt->sax->reference != NULL)) {
7650 ctxt->sax->reference(ctxt->userData, name);
7651 }
7652 }
7653 ctxt->valid = 0;
7654 }
7655
7656 /*
7657 * [ WFC: Parsed Entity ]
7658 * An entity reference must not contain the name of an
7659 * unparsed entity
7660 */
7661 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7662 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7663 "Entity reference to unparsed entity %s\n", name);
7664 }
7665
7666 /*
7667 * [ WFC: No External Entity References ]
7668 * Attribute values cannot contain direct or indirect
7669 * entity references to external entities.
7670 */
7671 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7672 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7673 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7674 "Attribute references external entity '%s'\n", name);
7675 }
7676 /*
7677 * [ WFC: No < in Attribute Values ]
7678 * The replacement text of any entity referred to directly or
7679 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007680 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007681 */
7682 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007683 (ent != NULL) &&
7684 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7685 if ((ent->checked & 1) || ((ent->checked == 0) &&
7686 (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7687 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7688 "'<' in entity '%s' is not allowed in attributes values\n", name);
7689 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007690 }
7691
7692 /*
7693 * Internal check, no parameter entities here ...
7694 */
7695 else {
7696 switch (ent->etype) {
7697 case XML_INTERNAL_PARAMETER_ENTITY:
7698 case XML_EXTERNAL_PARAMETER_ENTITY:
7699 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7700 "Attempt to reference the parameter entity '%s'\n",
7701 name);
7702 break;
7703 default:
7704 break;
7705 }
7706 }
7707
7708 /*
7709 * [ WFC: No Recursion ]
7710 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007711 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007712 * Done somewhere else
7713 */
Owen Taylor3473f882001-02-23 17:55:21 +00007714 return(ent);
7715}
7716
7717/**
7718 * xmlParseStringEntityRef:
7719 * @ctxt: an XML parser context
7720 * @str: a pointer to an index in the string
7721 *
7722 * parse ENTITY references declarations, but this version parses it from
7723 * a string value.
7724 *
7725 * [68] EntityRef ::= '&' Name ';'
7726 *
7727 * [ WFC: Entity Declared ]
7728 * In a document without any DTD, a document with only an internal DTD
7729 * subset which contains no parameter entity references, or a document
7730 * with "standalone='yes'", the Name given in the entity reference
7731 * must match that in an entity declaration, except that well-formed
7732 * documents need not declare any of the following entities: amp, lt,
7733 * gt, apos, quot. The declaration of a parameter entity must precede
7734 * any reference to it. Similarly, the declaration of a general entity
7735 * must precede any reference to it which appears in a default value in an
7736 * attribute-list declaration. Note that if entities are declared in the
7737 * external subset or in external parameter entities, a non-validating
7738 * processor is not obligated to read and process their declarations;
7739 * for such documents, the rule that an entity must be declared is a
7740 * well-formedness constraint only if standalone='yes'.
7741 *
7742 * [ WFC: Parsed Entity ]
7743 * An entity reference must not contain the name of an unparsed entity
7744 *
7745 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7746 * is updated to the current location in the string.
7747 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007748static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007749xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7750 xmlChar *name;
7751 const xmlChar *ptr;
7752 xmlChar cur;
7753 xmlEntityPtr ent = NULL;
7754
7755 if ((str == NULL) || (*str == NULL))
7756 return(NULL);
7757 ptr = *str;
7758 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007759 if (cur != '&')
7760 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007761
Daniel Veillard0161e632008-08-28 15:36:32 +00007762 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007763 name = xmlParseStringName(ctxt, &ptr);
7764 if (name == NULL) {
7765 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7766 "xmlParseStringEntityRef: no name\n");
7767 *str = ptr;
7768 return(NULL);
7769 }
7770 if (*ptr != ';') {
7771 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007772 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007773 *str = ptr;
7774 return(NULL);
7775 }
7776 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007777
Owen Taylor3473f882001-02-23 17:55:21 +00007778
Daniel Veillard0161e632008-08-28 15:36:32 +00007779 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007780 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007781 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007782 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7783 ent = xmlGetPredefinedEntity(name);
7784 if (ent != NULL) {
7785 xmlFree(name);
7786 *str = ptr;
7787 return(ent);
7788 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007789 }
Owen Taylor3473f882001-02-23 17:55:21 +00007790
Daniel Veillard0161e632008-08-28 15:36:32 +00007791 /*
7792 * Increate the number of entity references parsed
7793 */
7794 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007795
Daniel Veillard0161e632008-08-28 15:36:32 +00007796 /*
7797 * Ask first SAX for entity resolution, otherwise try the
7798 * entities which may have stored in the parser context.
7799 */
7800 if (ctxt->sax != NULL) {
7801 if (ctxt->sax->getEntity != NULL)
7802 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007803 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7804 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007805 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7806 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007807 }
7808 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007809 if (ctxt->instate == XML_PARSER_EOF) {
7810 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007811 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007812 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007813
7814 /*
7815 * [ WFC: Entity Declared ]
7816 * In a document without any DTD, a document with only an
7817 * internal DTD subset which contains no parameter entity
7818 * references, or a document with "standalone='yes'", the
7819 * Name given in the entity reference must match that in an
7820 * entity declaration, except that well-formed documents
7821 * need not declare any of the following entities: amp, lt,
7822 * gt, apos, quot.
7823 * The declaration of a parameter entity must precede any
7824 * reference to it.
7825 * Similarly, the declaration of a general entity must
7826 * precede any reference to it which appears in a default
7827 * value in an attribute-list declaration. Note that if
7828 * entities are declared in the external subset or in
7829 * external parameter entities, a non-validating processor
7830 * is not obligated to read and process their declarations;
7831 * for such documents, the rule that an entity must be
7832 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007833 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007834 */
7835 if (ent == NULL) {
7836 if ((ctxt->standalone == 1) ||
7837 ((ctxt->hasExternalSubset == 0) &&
7838 (ctxt->hasPErefs == 0))) {
7839 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7840 "Entity '%s' not defined\n", name);
7841 } else {
7842 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7843 "Entity '%s' not defined\n",
7844 name);
7845 }
7846 /* TODO ? check regressions ctxt->valid = 0; */
7847 }
7848
7849 /*
7850 * [ WFC: Parsed Entity ]
7851 * An entity reference must not contain the name of an
7852 * unparsed entity
7853 */
7854 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7855 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7856 "Entity reference to unparsed entity %s\n", name);
7857 }
7858
7859 /*
7860 * [ WFC: No External Entity References ]
7861 * Attribute values cannot contain direct or indirect
7862 * entity references to external entities.
7863 */
7864 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7865 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7866 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7867 "Attribute references external entity '%s'\n", name);
7868 }
7869 /*
7870 * [ WFC: No < in Attribute Values ]
7871 * The replacement text of any entity referred to directly or
7872 * indirectly in an attribute value (other than "&lt;") must
7873 * not contain a <.
7874 */
7875 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7876 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007877 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007878 (xmlStrchr(ent->content, '<'))) {
7879 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7880 "'<' in entity '%s' is not allowed in attributes values\n",
7881 name);
7882 }
7883
7884 /*
7885 * Internal check, no parameter entities here ...
7886 */
7887 else {
7888 switch (ent->etype) {
7889 case XML_INTERNAL_PARAMETER_ENTITY:
7890 case XML_EXTERNAL_PARAMETER_ENTITY:
7891 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7892 "Attempt to reference the parameter entity '%s'\n",
7893 name);
7894 break;
7895 default:
7896 break;
7897 }
7898 }
7899
7900 /*
7901 * [ WFC: No Recursion ]
7902 * A parsed entity must not contain a recursive reference
7903 * to itself, either directly or indirectly.
7904 * Done somewhere else
7905 */
7906
7907 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007908 *str = ptr;
7909 return(ent);
7910}
7911
7912/**
7913 * xmlParsePEReference:
7914 * @ctxt: an XML parser context
7915 *
7916 * parse PEReference declarations
7917 * The entity content is handled directly by pushing it's content as
7918 * a new input stream.
7919 *
7920 * [69] PEReference ::= '%' Name ';'
7921 *
7922 * [ WFC: No Recursion ]
7923 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007924 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007925 *
7926 * [ WFC: Entity Declared ]
7927 * In a document without any DTD, a document with only an internal DTD
7928 * subset which contains no parameter entity references, or a document
7929 * with "standalone='yes'", ... ... The declaration of a parameter
7930 * entity must precede any reference to it...
7931 *
7932 * [ VC: Entity Declared ]
7933 * In a document with an external subset or external parameter entities
7934 * with "standalone='no'", ... ... The declaration of a parameter entity
7935 * must precede any reference to it...
7936 *
7937 * [ WFC: In DTD ]
7938 * Parameter-entity references may only appear in the DTD.
7939 * NOTE: misleading but this is handled.
7940 */
7941void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007942xmlParsePEReference(xmlParserCtxtPtr ctxt)
7943{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007944 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007945 xmlEntityPtr entity = NULL;
7946 xmlParserInputPtr input;
7947
Daniel Veillard0161e632008-08-28 15:36:32 +00007948 if (RAW != '%')
7949 return;
7950 NEXT;
7951 name = xmlParseName(ctxt);
7952 if (name == NULL) {
7953 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7954 "xmlParsePEReference: no name\n");
7955 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007956 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007957 if (RAW != ';') {
7958 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7959 return;
7960 }
7961
7962 NEXT;
7963
7964 /*
7965 * Increate the number of entity references parsed
7966 */
7967 ctxt->nbentities++;
7968
7969 /*
7970 * Request the entity from SAX
7971 */
7972 if ((ctxt->sax != NULL) &&
7973 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007974 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7975 if (ctxt->instate == XML_PARSER_EOF)
7976 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007977 if (entity == NULL) {
7978 /*
7979 * [ WFC: Entity Declared ]
7980 * In a document without any DTD, a document with only an
7981 * internal DTD subset which contains no parameter entity
7982 * references, or a document with "standalone='yes'", ...
7983 * ... The declaration of a parameter entity must precede
7984 * any reference to it...
7985 */
7986 if ((ctxt->standalone == 1) ||
7987 ((ctxt->hasExternalSubset == 0) &&
7988 (ctxt->hasPErefs == 0))) {
7989 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7990 "PEReference: %%%s; not found\n",
7991 name);
7992 } else {
7993 /*
7994 * [ VC: Entity Declared ]
7995 * In a document with an external subset or external
7996 * parameter entities with "standalone='no'", ...
7997 * ... The declaration of a parameter entity must
7998 * precede any reference to it...
7999 */
8000 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8001 "PEReference: %%%s; not found\n",
8002 name, NULL);
8003 ctxt->valid = 0;
8004 }
8005 } else {
8006 /*
8007 * Internal checking in case the entity quest barfed
8008 */
8009 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8010 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8011 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8012 "Internal: %%%s; is not a parameter entity\n",
8013 name, NULL);
8014 } else if (ctxt->input->free != deallocblankswrapper) {
8015 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8016 if (xmlPushInput(ctxt, input) < 0)
8017 return;
8018 } else {
8019 /*
8020 * TODO !!!
8021 * handle the extra spaces added before and after
8022 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8023 */
8024 input = xmlNewEntityInputStream(ctxt, entity);
8025 if (xmlPushInput(ctxt, input) < 0)
8026 return;
8027 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8028 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8029 (IS_BLANK_CH(NXT(5)))) {
8030 xmlParseTextDecl(ctxt);
8031 if (ctxt->errNo ==
8032 XML_ERR_UNSUPPORTED_ENCODING) {
8033 /*
8034 * The XML REC instructs us to stop parsing
8035 * right here
8036 */
8037 ctxt->instate = XML_PARSER_EOF;
8038 return;
8039 }
8040 }
8041 }
8042 }
8043 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008044}
8045
8046/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008047 * xmlLoadEntityContent:
8048 * @ctxt: an XML parser context
8049 * @entity: an unloaded system entity
8050 *
8051 * Load the original content of the given system entity from the
8052 * ExternalID/SystemID given. This is to be used for Included in Literal
8053 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8054 *
8055 * Returns 0 in case of success and -1 in case of failure
8056 */
8057static int
8058xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8059 xmlParserInputPtr input;
8060 xmlBufferPtr buf;
8061 int l, c;
8062 int count = 0;
8063
8064 if ((ctxt == NULL) || (entity == NULL) ||
8065 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8066 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8067 (entity->content != NULL)) {
8068 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8069 "xmlLoadEntityContent parameter error");
8070 return(-1);
8071 }
8072
8073 if (xmlParserDebugEntities)
8074 xmlGenericError(xmlGenericErrorContext,
8075 "Reading %s entity content input\n", entity->name);
8076
8077 buf = xmlBufferCreate();
8078 if (buf == NULL) {
8079 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8080 "xmlLoadEntityContent parameter error");
8081 return(-1);
8082 }
8083
8084 input = xmlNewEntityInputStream(ctxt, entity);
8085 if (input == NULL) {
8086 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8087 "xmlLoadEntityContent input error");
8088 xmlBufferFree(buf);
8089 return(-1);
8090 }
8091
8092 /*
8093 * Push the entity as the current input, read char by char
8094 * saving to the buffer until the end of the entity or an error
8095 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008096 if (xmlPushInput(ctxt, input) < 0) {
8097 xmlBufferFree(buf);
8098 return(-1);
8099 }
8100
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008101 GROW;
8102 c = CUR_CHAR(l);
8103 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8104 (IS_CHAR(c))) {
8105 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008106 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008107 count = 0;
8108 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008109 if (ctxt->instate == XML_PARSER_EOF) {
8110 xmlBufferFree(buf);
8111 return(-1);
8112 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008113 }
8114 NEXTL(l);
8115 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008116 if (c == 0) {
8117 count = 0;
8118 GROW;
8119 if (ctxt->instate == XML_PARSER_EOF) {
8120 xmlBufferFree(buf);
8121 return(-1);
8122 }
8123 c = CUR_CHAR(l);
8124 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008125 }
8126
8127 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8128 xmlPopInput(ctxt);
8129 } else if (!IS_CHAR(c)) {
8130 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8131 "xmlLoadEntityContent: invalid char value %d\n",
8132 c);
8133 xmlBufferFree(buf);
8134 return(-1);
8135 }
8136 entity->content = buf->content;
8137 buf->content = NULL;
8138 xmlBufferFree(buf);
8139
8140 return(0);
8141}
8142
8143/**
Owen Taylor3473f882001-02-23 17:55:21 +00008144 * xmlParseStringPEReference:
8145 * @ctxt: an XML parser context
8146 * @str: a pointer to an index in the string
8147 *
8148 * parse PEReference declarations
8149 *
8150 * [69] PEReference ::= '%' Name ';'
8151 *
8152 * [ WFC: No Recursion ]
8153 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008154 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008155 *
8156 * [ WFC: Entity Declared ]
8157 * In a document without any DTD, a document with only an internal DTD
8158 * subset which contains no parameter entity references, or a document
8159 * with "standalone='yes'", ... ... The declaration of a parameter
8160 * entity must precede any reference to it...
8161 *
8162 * [ VC: Entity Declared ]
8163 * In a document with an external subset or external parameter entities
8164 * with "standalone='no'", ... ... The declaration of a parameter entity
8165 * must precede any reference to it...
8166 *
8167 * [ WFC: In DTD ]
8168 * Parameter-entity references may only appear in the DTD.
8169 * NOTE: misleading but this is handled.
8170 *
8171 * Returns the string of the entity content.
8172 * str is updated to the current value of the index
8173 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008174static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008175xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8176 const xmlChar *ptr;
8177 xmlChar cur;
8178 xmlChar *name;
8179 xmlEntityPtr entity = NULL;
8180
8181 if ((str == NULL) || (*str == NULL)) return(NULL);
8182 ptr = *str;
8183 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008184 if (cur != '%')
8185 return(NULL);
8186 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008187 name = xmlParseStringName(ctxt, &ptr);
8188 if (name == NULL) {
8189 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8190 "xmlParseStringPEReference: no name\n");
8191 *str = ptr;
8192 return(NULL);
8193 }
8194 cur = *ptr;
8195 if (cur != ';') {
8196 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8197 xmlFree(name);
8198 *str = ptr;
8199 return(NULL);
8200 }
8201 ptr++;
8202
8203 /*
8204 * Increate the number of entity references parsed
8205 */
8206 ctxt->nbentities++;
8207
8208 /*
8209 * Request the entity from SAX
8210 */
8211 if ((ctxt->sax != NULL) &&
8212 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008213 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8214 if (ctxt->instate == XML_PARSER_EOF) {
8215 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008216 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008217 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008218 if (entity == NULL) {
8219 /*
8220 * [ WFC: Entity Declared ]
8221 * In a document without any DTD, a document with only an
8222 * internal DTD subset which contains no parameter entity
8223 * references, or a document with "standalone='yes'", ...
8224 * ... The declaration of a parameter entity must precede
8225 * any reference to it...
8226 */
8227 if ((ctxt->standalone == 1) ||
8228 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8229 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8230 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008231 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008232 /*
8233 * [ VC: Entity Declared ]
8234 * In a document with an external subset or external
8235 * parameter entities with "standalone='no'", ...
8236 * ... The declaration of a parameter entity must
8237 * precede any reference to it...
8238 */
8239 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8240 "PEReference: %%%s; not found\n",
8241 name, NULL);
8242 ctxt->valid = 0;
8243 }
8244 } else {
8245 /*
8246 * Internal checking in case the entity quest barfed
8247 */
8248 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8249 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8250 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8251 "%%%s; is not a parameter entity\n",
8252 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008253 }
8254 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008255 ctxt->hasPErefs = 1;
8256 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008257 *str = ptr;
8258 return(entity);
8259}
8260
8261/**
8262 * xmlParseDocTypeDecl:
8263 * @ctxt: an XML parser context
8264 *
8265 * parse a DOCTYPE declaration
8266 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008267 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008268 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8269 *
8270 * [ VC: Root Element Type ]
8271 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008272 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008273 */
8274
8275void
8276xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008277 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008278 xmlChar *ExternalID = NULL;
8279 xmlChar *URI = NULL;
8280
8281 /*
8282 * We know that '<!DOCTYPE' has been detected.
8283 */
8284 SKIP(9);
8285
8286 SKIP_BLANKS;
8287
8288 /*
8289 * Parse the DOCTYPE name.
8290 */
8291 name = xmlParseName(ctxt);
8292 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008293 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8294 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008295 }
8296 ctxt->intSubName = name;
8297
8298 SKIP_BLANKS;
8299
8300 /*
8301 * Check for SystemID and ExternalID
8302 */
8303 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8304
8305 if ((URI != NULL) || (ExternalID != NULL)) {
8306 ctxt->hasExternalSubset = 1;
8307 }
8308 ctxt->extSubURI = URI;
8309 ctxt->extSubSystem = ExternalID;
8310
8311 SKIP_BLANKS;
8312
8313 /*
8314 * Create and update the internal subset.
8315 */
8316 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8317 (!ctxt->disableSAX))
8318 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008319 if (ctxt->instate == XML_PARSER_EOF)
8320 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008321
8322 /*
8323 * Is there any internal subset declarations ?
8324 * they are handled separately in xmlParseInternalSubset()
8325 */
8326 if (RAW == '[')
8327 return;
8328
8329 /*
8330 * We should be at the end of the DOCTYPE declaration.
8331 */
8332 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008333 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008334 }
8335 NEXT;
8336}
8337
8338/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008339 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008340 * @ctxt: an XML parser context
8341 *
8342 * parse the internal subset declaration
8343 *
8344 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8345 */
8346
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008347static void
Owen Taylor3473f882001-02-23 17:55:21 +00008348xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8349 /*
8350 * Is there any DTD definition ?
8351 */
8352 if (RAW == '[') {
8353 ctxt->instate = XML_PARSER_DTD;
8354 NEXT;
8355 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008356 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008357 * PEReferences.
8358 * Subsequence (markupdecl | PEReference | S)*
8359 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008360 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008361 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008362 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008363
8364 SKIP_BLANKS;
8365 xmlParseMarkupDecl(ctxt);
8366 xmlParsePEReference(ctxt);
8367
8368 /*
8369 * Pop-up of finished entities.
8370 */
8371 while ((RAW == 0) && (ctxt->inputNr > 1))
8372 xmlPopInput(ctxt);
8373
8374 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008375 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008376 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008377 break;
8378 }
8379 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008380 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008381 NEXT;
8382 SKIP_BLANKS;
8383 }
8384 }
8385
8386 /*
8387 * We should be at the end of the DOCTYPE declaration.
8388 */
8389 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008390 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008391 }
8392 NEXT;
8393}
8394
Daniel Veillard81273902003-09-30 00:43:48 +00008395#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008396/**
8397 * xmlParseAttribute:
8398 * @ctxt: an XML parser context
8399 * @value: a xmlChar ** used to store the value of the attribute
8400 *
8401 * parse an attribute
8402 *
8403 * [41] Attribute ::= Name Eq AttValue
8404 *
8405 * [ WFC: No External Entity References ]
8406 * Attribute values cannot contain direct or indirect entity references
8407 * to external entities.
8408 *
8409 * [ WFC: No < in Attribute Values ]
8410 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008411 * an attribute value (other than "&lt;") must not contain a <.
8412 *
Owen Taylor3473f882001-02-23 17:55:21 +00008413 * [ VC: Attribute Value Type ]
8414 * The attribute must have been declared; the value must be of the type
8415 * declared for it.
8416 *
8417 * [25] Eq ::= S? '=' S?
8418 *
8419 * With namespace:
8420 *
8421 * [NS 11] Attribute ::= QName Eq AttValue
8422 *
8423 * Also the case QName == xmlns:??? is handled independently as a namespace
8424 * definition.
8425 *
8426 * Returns the attribute name, and the value in *value.
8427 */
8428
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008429const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008430xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008431 const xmlChar *name;
8432 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008433
8434 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008435 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008436 name = xmlParseName(ctxt);
8437 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008438 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008439 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008440 return(NULL);
8441 }
8442
8443 /*
8444 * read the value
8445 */
8446 SKIP_BLANKS;
8447 if (RAW == '=') {
8448 NEXT;
8449 SKIP_BLANKS;
8450 val = xmlParseAttValue(ctxt);
8451 ctxt->instate = XML_PARSER_CONTENT;
8452 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008453 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008454 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008455 return(NULL);
8456 }
8457
8458 /*
8459 * Check that xml:lang conforms to the specification
8460 * No more registered as an error, just generate a warning now
8461 * since this was deprecated in XML second edition
8462 */
8463 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8464 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008465 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8466 "Malformed value for xml:lang : %s\n",
8467 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008468 }
8469 }
8470
8471 /*
8472 * Check that xml:space conforms to the specification
8473 */
8474 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8475 if (xmlStrEqual(val, BAD_CAST "default"))
8476 *(ctxt->space) = 0;
8477 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8478 *(ctxt->space) = 1;
8479 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008480 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008481"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008482 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008483 }
8484 }
8485
8486 *value = val;
8487 return(name);
8488}
8489
8490/**
8491 * xmlParseStartTag:
8492 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008493 *
Owen Taylor3473f882001-02-23 17:55:21 +00008494 * parse a start of tag either for rule element or
8495 * EmptyElement. In both case we don't parse the tag closing chars.
8496 *
8497 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8498 *
8499 * [ WFC: Unique Att Spec ]
8500 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008501 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008502 *
8503 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8504 *
8505 * [ WFC: Unique Att Spec ]
8506 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008507 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008508 *
8509 * With namespace:
8510 *
8511 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8512 *
8513 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8514 *
8515 * Returns the element name parsed
8516 */
8517
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008518const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008519xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008520 const xmlChar *name;
8521 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008522 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008523 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008524 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008525 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008526 int i;
8527
8528 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008529 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008530
8531 name = xmlParseName(ctxt);
8532 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008533 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008534 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008535 return(NULL);
8536 }
8537
8538 /*
8539 * Now parse the attributes, it ends up with the ending
8540 *
8541 * (S Attribute)* S?
8542 */
8543 SKIP_BLANKS;
8544 GROW;
8545
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008546 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008547 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008548 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008549 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008550 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008551
8552 attname = xmlParseAttribute(ctxt, &attvalue);
8553 if ((attname != NULL) && (attvalue != NULL)) {
8554 /*
8555 * [ WFC: Unique Att Spec ]
8556 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008557 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008558 */
8559 for (i = 0; i < nbatts;i += 2) {
8560 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008561 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008562 xmlFree(attvalue);
8563 goto failed;
8564 }
8565 }
Owen Taylor3473f882001-02-23 17:55:21 +00008566 /*
8567 * Add the pair to atts
8568 */
8569 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008570 maxatts = 22; /* allow for 10 attrs by default */
8571 atts = (const xmlChar **)
8572 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008573 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008574 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008575 if (attvalue != NULL)
8576 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008577 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008578 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008579 ctxt->atts = atts;
8580 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008581 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008582 const xmlChar **n;
8583
Owen Taylor3473f882001-02-23 17:55:21 +00008584 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008585 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008586 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008587 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008588 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008589 if (attvalue != NULL)
8590 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008591 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008592 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008593 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008594 ctxt->atts = atts;
8595 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008596 }
8597 atts[nbatts++] = attname;
8598 atts[nbatts++] = attvalue;
8599 atts[nbatts] = NULL;
8600 atts[nbatts + 1] = NULL;
8601 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008602 if (attvalue != NULL)
8603 xmlFree(attvalue);
8604 }
8605
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008606failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008607
Daniel Veillard3772de32002-12-17 10:31:45 +00008608 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008609 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8610 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008611 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008612 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8613 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008614 }
8615 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008616 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8617 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008618 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8619 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008620 break;
8621 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008622 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008623 GROW;
8624 }
8625
8626 /*
8627 * SAX: Start of Element !
8628 */
8629 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008630 (!ctxt->disableSAX)) {
8631 if (nbatts > 0)
8632 ctxt->sax->startElement(ctxt->userData, name, atts);
8633 else
8634 ctxt->sax->startElement(ctxt->userData, name, NULL);
8635 }
Owen Taylor3473f882001-02-23 17:55:21 +00008636
8637 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008638 /* Free only the content strings */
8639 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008640 if (atts[i] != NULL)
8641 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008642 }
8643 return(name);
8644}
8645
8646/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008647 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008648 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649 * @line: line of the start tag
8650 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008651 *
8652 * parse an end of tag
8653 *
8654 * [42] ETag ::= '</' Name S? '>'
8655 *
8656 * With namespace
8657 *
8658 * [NS 9] ETag ::= '</' QName S? '>'
8659 */
8660
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008661static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008662xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008663 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008664
8665 GROW;
8666 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008667 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008668 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008669 return;
8670 }
8671 SKIP(2);
8672
Daniel Veillard46de64e2002-05-29 08:21:33 +00008673 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008674
8675 /*
8676 * We should definitely be at the ending "S? '>'" part
8677 */
8678 GROW;
8679 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008680 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008681 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008682 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008683 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008684
8685 /*
8686 * [ WFC: Element Type Match ]
8687 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008688 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008689 *
8690 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008691 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008692 if (name == NULL) name = BAD_CAST "unparseable";
8693 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008694 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008695 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008696 }
8697
8698 /*
8699 * SAX: End of Tag
8700 */
8701 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8702 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008703 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008704
Daniel Veillarde57ec792003-09-10 10:50:59 +00008705 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008706 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008707 return;
8708}
8709
8710/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008711 * xmlParseEndTag:
8712 * @ctxt: an XML parser context
8713 *
8714 * parse an end of tag
8715 *
8716 * [42] ETag ::= '</' Name S? '>'
8717 *
8718 * With namespace
8719 *
8720 * [NS 9] ETag ::= '</' QName S? '>'
8721 */
8722
8723void
8724xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008725 xmlParseEndTag1(ctxt, 0);
8726}
Daniel Veillard81273902003-09-30 00:43:48 +00008727#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008728
8729/************************************************************************
8730 * *
8731 * SAX 2 specific operations *
8732 * *
8733 ************************************************************************/
8734
Daniel Veillard0fb18932003-09-07 09:14:37 +00008735/*
8736 * xmlGetNamespace:
8737 * @ctxt: an XML parser context
8738 * @prefix: the prefix to lookup
8739 *
8740 * Lookup the namespace name for the @prefix (which ca be NULL)
8741 * The prefix must come from the @ctxt->dict dictionnary
8742 *
8743 * Returns the namespace name or NULL if not bound
8744 */
8745static const xmlChar *
8746xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8747 int i;
8748
Daniel Veillarde57ec792003-09-10 10:50:59 +00008749 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008750 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008751 if (ctxt->nsTab[i] == prefix) {
8752 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8753 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008754 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008755 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756 return(NULL);
8757}
8758
8759/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008760 * xmlParseQName:
8761 * @ctxt: an XML parser context
8762 * @prefix: pointer to store the prefix part
8763 *
8764 * parse an XML Namespace QName
8765 *
8766 * [6] QName ::= (Prefix ':')? LocalPart
8767 * [7] Prefix ::= NCName
8768 * [8] LocalPart ::= NCName
8769 *
8770 * Returns the Name parsed or NULL
8771 */
8772
8773static const xmlChar *
8774xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8775 const xmlChar *l, *p;
8776
8777 GROW;
8778
8779 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008780 if (l == NULL) {
8781 if (CUR == ':') {
8782 l = xmlParseName(ctxt);
8783 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008784 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008785 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008786 *prefix = NULL;
8787 return(l);
8788 }
8789 }
8790 return(NULL);
8791 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008792 if (CUR == ':') {
8793 NEXT;
8794 p = l;
8795 l = xmlParseNCName(ctxt);
8796 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008797 xmlChar *tmp;
8798
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008799 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8800 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008801 l = xmlParseNmtoken(ctxt);
8802 if (l == NULL)
8803 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8804 else {
8805 tmp = xmlBuildQName(l, p, NULL, 0);
8806 xmlFree((char *)l);
8807 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008808 p = xmlDictLookup(ctxt->dict, tmp, -1);
8809 if (tmp != NULL) xmlFree(tmp);
8810 *prefix = NULL;
8811 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008812 }
8813 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008814 xmlChar *tmp;
8815
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008816 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8817 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008818 NEXT;
8819 tmp = (xmlChar *) xmlParseName(ctxt);
8820 if (tmp != NULL) {
8821 tmp = xmlBuildQName(tmp, l, NULL, 0);
8822 l = xmlDictLookup(ctxt->dict, tmp, -1);
8823 if (tmp != NULL) xmlFree(tmp);
8824 *prefix = p;
8825 return(l);
8826 }
8827 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8828 l = xmlDictLookup(ctxt->dict, tmp, -1);
8829 if (tmp != NULL) xmlFree(tmp);
8830 *prefix = p;
8831 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832 }
8833 *prefix = p;
8834 } else
8835 *prefix = NULL;
8836 return(l);
8837}
8838
8839/**
8840 * xmlParseQNameAndCompare:
8841 * @ctxt: an XML parser context
8842 * @name: the localname
8843 * @prefix: the prefix, if any.
8844 *
8845 * parse an XML name and compares for match
8846 * (specialized for endtag parsing)
8847 *
8848 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8849 * and the name for mismatch
8850 */
8851
8852static const xmlChar *
8853xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8854 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008855 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008856 const xmlChar *in;
8857 const xmlChar *ret;
8858 const xmlChar *prefix2;
8859
8860 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8861
8862 GROW;
8863 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008864
Daniel Veillard0fb18932003-09-07 09:14:37 +00008865 cmp = prefix;
8866 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008867 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008868 ++cmp;
8869 }
8870 if ((*cmp == 0) && (*in == ':')) {
8871 in++;
8872 cmp = name;
8873 while (*in != 0 && *in == *cmp) {
8874 ++in;
8875 ++cmp;
8876 }
William M. Brack76e95df2003-10-18 16:20:14 +00008877 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008878 /* success */
8879 ctxt->input->cur = in;
8880 return((const xmlChar*) 1);
8881 }
8882 }
8883 /*
8884 * all strings coms from the dictionary, equality can be done directly
8885 */
8886 ret = xmlParseQName (ctxt, &prefix2);
8887 if ((ret == name) && (prefix == prefix2))
8888 return((const xmlChar*) 1);
8889 return ret;
8890}
8891
8892/**
8893 * xmlParseAttValueInternal:
8894 * @ctxt: an XML parser context
8895 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008896 * @alloc: whether the attribute was reallocated as a new string
8897 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008898 *
8899 * parse a value for an attribute.
8900 * NOTE: if no normalization is needed, the routine will return pointers
8901 * directly from the data buffer.
8902 *
8903 * 3.3.3 Attribute-Value Normalization:
8904 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008905 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008906 * - a character reference is processed by appending the referenced
8907 * character to the attribute value
8908 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008909 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008910 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8911 * appending #x20 to the normalized value, except that only a single
8912 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008913 * parsed entity or the literal entity value of an internal parsed entity
8914 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008915 * If the declared value is not CDATA, then the XML processor must further
8916 * process the normalized attribute value by discarding any leading and
8917 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008918 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008919 * All attributes for which no declaration has been read should be treated
8920 * by a non-validating parser as if declared CDATA.
8921 *
8922 * Returns the AttValue parsed or NULL. The value has to be freed by the
8923 * caller if it was copied, this can be detected by val[*len] == 0.
8924 */
8925
8926static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008927xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8928 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008929{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008930 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008931 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008932 xmlChar *ret = NULL;
8933
8934 GROW;
8935 in = (xmlChar *) CUR_PTR;
8936 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008937 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008938 return (NULL);
8939 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008940 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008941
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008942 /*
8943 * try to handle in this routine the most common case where no
8944 * allocation of a new string is required and where content is
8945 * pure ASCII.
8946 */
8947 limit = *in++;
8948 end = ctxt->input->end;
8949 start = in;
8950 if (in >= end) {
8951 const xmlChar *oldbase = ctxt->input->base;
8952 GROW;
8953 if (oldbase != ctxt->input->base) {
8954 long delta = ctxt->input->base - oldbase;
8955 start = start + delta;
8956 in = in + delta;
8957 }
8958 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008959 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008960 if (normalize) {
8961 /*
8962 * Skip any leading spaces
8963 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008964 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008965 ((*in == 0x20) || (*in == 0x9) ||
8966 (*in == 0xA) || (*in == 0xD))) {
8967 in++;
8968 start = in;
8969 if (in >= end) {
8970 const xmlChar *oldbase = ctxt->input->base;
8971 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008972 if (ctxt->instate == XML_PARSER_EOF)
8973 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008974 if (oldbase != ctxt->input->base) {
8975 long delta = ctxt->input->base - oldbase;
8976 start = start + delta;
8977 in = in + delta;
8978 }
8979 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008980 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8981 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8982 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008983 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008984 return(NULL);
8985 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008986 }
8987 }
8988 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8989 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8990 if ((*in++ == 0x20) && (*in == 0x20)) break;
8991 if (in >= end) {
8992 const xmlChar *oldbase = ctxt->input->base;
8993 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008994 if (ctxt->instate == XML_PARSER_EOF)
8995 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008996 if (oldbase != ctxt->input->base) {
8997 long delta = ctxt->input->base - oldbase;
8998 start = start + delta;
8999 in = in + delta;
9000 }
9001 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009002 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9003 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9004 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009005 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009006 return(NULL);
9007 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009008 }
9009 }
9010 last = in;
9011 /*
9012 * skip the trailing blanks
9013 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009014 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009015 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009016 ((*in == 0x20) || (*in == 0x9) ||
9017 (*in == 0xA) || (*in == 0xD))) {
9018 in++;
9019 if (in >= end) {
9020 const xmlChar *oldbase = ctxt->input->base;
9021 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009022 if (ctxt->instate == XML_PARSER_EOF)
9023 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009024 if (oldbase != ctxt->input->base) {
9025 long delta = ctxt->input->base - oldbase;
9026 start = start + delta;
9027 in = in + delta;
9028 last = last + delta;
9029 }
9030 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009031 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9032 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9033 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009034 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009035 return(NULL);
9036 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009037 }
9038 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009039 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9040 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9041 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009042 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009043 return(NULL);
9044 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009045 if (*in != limit) goto need_complex;
9046 } else {
9047 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9048 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9049 in++;
9050 if (in >= end) {
9051 const xmlChar *oldbase = ctxt->input->base;
9052 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009053 if (ctxt->instate == XML_PARSER_EOF)
9054 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009055 if (oldbase != ctxt->input->base) {
9056 long delta = ctxt->input->base - oldbase;
9057 start = start + delta;
9058 in = in + delta;
9059 }
9060 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009061 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9062 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9063 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009064 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009065 return(NULL);
9066 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009067 }
9068 }
9069 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009070 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9071 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9072 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009073 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009074 return(NULL);
9075 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009076 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009077 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009078 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009079 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009080 *len = last - start;
9081 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009082 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009083 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009084 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009085 }
9086 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009087 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009088 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009089need_complex:
9090 if (alloc) *alloc = 1;
9091 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009092}
9093
9094/**
9095 * xmlParseAttribute2:
9096 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009097 * @pref: the element prefix
9098 * @elem: the element name
9099 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009100 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009101 * @len: an int * to save the length of the attribute
9102 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009103 *
9104 * parse an attribute in the new SAX2 framework.
9105 *
9106 * Returns the attribute name, and the value in *value, .
9107 */
9108
9109static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009110xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009111 const xmlChar * pref, const xmlChar * elem,
9112 const xmlChar ** prefix, xmlChar ** value,
9113 int *len, int *alloc)
9114{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009115 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009116 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009117 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009118
9119 *value = NULL;
9120 GROW;
9121 name = xmlParseQName(ctxt, prefix);
9122 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009123 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9124 "error parsing attribute name\n");
9125 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009126 }
9127
9128 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009129 * get the type if needed
9130 */
9131 if (ctxt->attsSpecial != NULL) {
9132 int type;
9133
9134 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009135 pref, elem, *prefix, name);
9136 if (type != 0)
9137 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009138 }
9139
9140 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009141 * read the value
9142 */
9143 SKIP_BLANKS;
9144 if (RAW == '=') {
9145 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009146 SKIP_BLANKS;
9147 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9148 if (normalize) {
9149 /*
9150 * Sometimes a second normalisation pass for spaces is needed
9151 * but that only happens if charrefs or entities refernces
9152 * have been used in the attribute value, i.e. the attribute
9153 * value have been extracted in an allocated string already.
9154 */
9155 if (*alloc) {
9156 const xmlChar *val2;
9157
9158 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009159 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009160 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009161 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009162 }
9163 }
9164 }
9165 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009166 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009167 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9168 "Specification mandate value for attribute %s\n",
9169 name);
9170 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009171 }
9172
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009173 if (*prefix == ctxt->str_xml) {
9174 /*
9175 * Check that xml:lang conforms to the specification
9176 * No more registered as an error, just generate a warning now
9177 * since this was deprecated in XML second edition
9178 */
9179 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9180 internal_val = xmlStrndup(val, *len);
9181 if (!xmlCheckLanguageID(internal_val)) {
9182 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9183 "Malformed value for xml:lang : %s\n",
9184 internal_val, NULL);
9185 }
9186 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009187
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009188 /*
9189 * Check that xml:space conforms to the specification
9190 */
9191 if (xmlStrEqual(name, BAD_CAST "space")) {
9192 internal_val = xmlStrndup(val, *len);
9193 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9194 *(ctxt->space) = 0;
9195 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9196 *(ctxt->space) = 1;
9197 else {
9198 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9199 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9200 internal_val, NULL);
9201 }
9202 }
9203 if (internal_val) {
9204 xmlFree(internal_val);
9205 }
9206 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009207
9208 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009209 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009210}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009211/**
9212 * xmlParseStartTag2:
9213 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009214 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009215 * parse a start of tag either for rule element or
9216 * EmptyElement. In both case we don't parse the tag closing chars.
9217 * This routine is called when running SAX2 parsing
9218 *
9219 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9220 *
9221 * [ WFC: Unique Att Spec ]
9222 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009223 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009224 *
9225 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9226 *
9227 * [ WFC: Unique Att Spec ]
9228 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009229 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009230 *
9231 * With namespace:
9232 *
9233 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9234 *
9235 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9236 *
9237 * Returns the element name parsed
9238 */
9239
9240static const xmlChar *
9241xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009242 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009243 const xmlChar *localname;
9244 const xmlChar *prefix;
9245 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009246 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009247 const xmlChar *nsname;
9248 xmlChar *attvalue;
9249 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009250 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009251 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009252 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009253 const xmlChar *base;
9254 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009255 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009256
9257 if (RAW != '<') return(NULL);
9258 NEXT1;
9259
9260 /*
9261 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9262 * point since the attribute values may be stored as pointers to
9263 * the buffer and calling SHRINK would destroy them !
9264 * The Shrinking is only possible once the full set of attribute
9265 * callbacks have been done.
9266 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009267reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009268 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009269 base = ctxt->input->base;
9270 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009271 oldline = ctxt->input->line;
9272 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009273 nbatts = 0;
9274 nratts = 0;
9275 nbdef = 0;
9276 nbNs = 0;
9277 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009278 /* Forget any namespaces added during an earlier parse of this element. */
9279 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009280
9281 localname = xmlParseQName(ctxt, &prefix);
9282 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009283 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9284 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009285 return(NULL);
9286 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009287 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009288
9289 /*
9290 * Now parse the attributes, it ends up with the ending
9291 *
9292 * (S Attribute)* S?
9293 */
9294 SKIP_BLANKS;
9295 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009296 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009297
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009298 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009299 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009300 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009301 const xmlChar *q = CUR_PTR;
9302 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009303 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009304
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009305 attname = xmlParseAttribute2(ctxt, prefix, localname,
9306 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009307 if (ctxt->input->base != base) {
9308 if ((attvalue != NULL) && (alloc != 0))
9309 xmlFree(attvalue);
9310 attvalue = NULL;
9311 goto base_changed;
9312 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009313 if ((attname != NULL) && (attvalue != NULL)) {
9314 if (len < 0) len = xmlStrlen(attvalue);
9315 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009316 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9317 xmlURIPtr uri;
9318
Daniel Veillardc836ba62014-07-14 16:39:50 +08009319 if (URL == NULL) {
9320 xmlErrMemory(ctxt, "dictionary allocation failure");
9321 if ((attvalue != NULL) && (alloc != 0))
9322 xmlFree(attvalue);
9323 return(NULL);
9324 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009325 if (*URL != 0) {
9326 uri = xmlParseURI((const char *) URL);
9327 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009328 xmlNsErr(ctxt, XML_WAR_NS_URI,
9329 "xmlns: '%s' is not a valid URI\n",
9330 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009331 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009332 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009333 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9334 "xmlns: URI %s is not absolute\n",
9335 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009336 }
9337 xmlFreeURI(uri);
9338 }
Daniel Veillard37334572008-07-31 08:20:02 +00009339 if (URL == ctxt->str_xml_ns) {
9340 if (attname != ctxt->str_xml) {
9341 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9342 "xml namespace URI cannot be the default namespace\n",
9343 NULL, NULL, NULL);
9344 }
9345 goto skip_default_ns;
9346 }
9347 if ((len == 29) &&
9348 (xmlStrEqual(URL,
9349 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9350 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9351 "reuse of the xmlns namespace name is forbidden\n",
9352 NULL, NULL, NULL);
9353 goto skip_default_ns;
9354 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009355 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009356 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009357 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009358 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009359 for (j = 1;j <= nbNs;j++)
9360 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9361 break;
9362 if (j <= nbNs)
9363 xmlErrAttributeDup(ctxt, NULL, attname);
9364 else
9365 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009366skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009367 if (alloc != 0) xmlFree(attvalue);
9368 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009369 continue;
9370 }
9371 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009372 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9373 xmlURIPtr uri;
9374
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009375 if (attname == ctxt->str_xml) {
9376 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009377 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9378 "xml namespace prefix mapped to wrong URI\n",
9379 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009380 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009381 /*
9382 * Do not keep a namespace definition node
9383 */
Daniel Veillard37334572008-07-31 08:20:02 +00009384 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009385 }
Daniel Veillard37334572008-07-31 08:20:02 +00009386 if (URL == ctxt->str_xml_ns) {
9387 if (attname != ctxt->str_xml) {
9388 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9389 "xml namespace URI mapped to wrong prefix\n",
9390 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009391 }
Daniel Veillard37334572008-07-31 08:20:02 +00009392 goto skip_ns;
9393 }
9394 if (attname == ctxt->str_xmlns) {
9395 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9396 "redefinition of the xmlns prefix is forbidden\n",
9397 NULL, NULL, NULL);
9398 goto skip_ns;
9399 }
9400 if ((len == 29) &&
9401 (xmlStrEqual(URL,
9402 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9403 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9404 "reuse of the xmlns namespace name is forbidden\n",
9405 NULL, NULL, NULL);
9406 goto skip_ns;
9407 }
9408 if ((URL == NULL) || (URL[0] == 0)) {
9409 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9410 "xmlns:%s: Empty XML namespace is not allowed\n",
9411 attname, NULL, NULL);
9412 goto skip_ns;
9413 } else {
9414 uri = xmlParseURI((const char *) URL);
9415 if (uri == NULL) {
9416 xmlNsErr(ctxt, XML_WAR_NS_URI,
9417 "xmlns:%s: '%s' is not a valid URI\n",
9418 attname, URL, NULL);
9419 } else {
9420 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9421 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9422 "xmlns:%s: URI %s is not absolute\n",
9423 attname, URL, NULL);
9424 }
9425 xmlFreeURI(uri);
9426 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009427 }
9428
Daniel Veillard0fb18932003-09-07 09:14:37 +00009429 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009430 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009431 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009432 for (j = 1;j <= nbNs;j++)
9433 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9434 break;
9435 if (j <= nbNs)
9436 xmlErrAttributeDup(ctxt, aprefix, attname);
9437 else
9438 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009439skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009440 if (alloc != 0) xmlFree(attvalue);
9441 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009442 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009443 continue;
9444 }
9445
9446 /*
9447 * Add the pair to atts
9448 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009449 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9450 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009451 if (attvalue[len] == 0)
9452 xmlFree(attvalue);
9453 goto failed;
9454 }
9455 maxatts = ctxt->maxatts;
9456 atts = ctxt->atts;
9457 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009458 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009459 atts[nbatts++] = attname;
9460 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009461 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009462 atts[nbatts++] = attvalue;
9463 attvalue += len;
9464 atts[nbatts++] = attvalue;
9465 /*
9466 * tag if some deallocation is needed
9467 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009468 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009469 } else {
9470 if ((attvalue != NULL) && (attvalue[len] == 0))
9471 xmlFree(attvalue);
9472 }
9473
Daniel Veillard37334572008-07-31 08:20:02 +00009474failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009475
9476 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009477 if (ctxt->instate == XML_PARSER_EOF)
9478 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009479 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009480 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9481 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009482 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009483 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9484 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009485 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009486 }
9487 SKIP_BLANKS;
9488 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9489 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009490 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009491 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009492 break;
9493 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009494 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009495 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009496 }
9497
Daniel Veillard0fb18932003-09-07 09:14:37 +00009498 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009499 * The attributes defaulting
9500 */
9501 if (ctxt->attsDefault != NULL) {
9502 xmlDefAttrsPtr defaults;
9503
9504 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9505 if (defaults != NULL) {
9506 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009507 attname = defaults->values[5 * i];
9508 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009509
9510 /*
9511 * special work for namespaces defaulted defs
9512 */
9513 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9514 /*
9515 * check that it's not a defined namespace
9516 */
9517 for (j = 1;j <= nbNs;j++)
9518 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9519 break;
9520 if (j <= nbNs) continue;
9521
9522 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009523 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009524 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009525 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009526 nbNs++;
9527 }
9528 } else if (aprefix == ctxt->str_xmlns) {
9529 /*
9530 * check that it's not a defined namespace
9531 */
9532 for (j = 1;j <= nbNs;j++)
9533 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9534 break;
9535 if (j <= nbNs) continue;
9536
9537 nsname = xmlGetNamespace(ctxt, attname);
9538 if (nsname != defaults->values[2]) {
9539 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009540 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009541 nbNs++;
9542 }
9543 } else {
9544 /*
9545 * check that it's not a defined attribute
9546 */
9547 for (j = 0;j < nbatts;j+=5) {
9548 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9549 break;
9550 }
9551 if (j < nbatts) continue;
9552
9553 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9554 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009555 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009556 }
9557 maxatts = ctxt->maxatts;
9558 atts = ctxt->atts;
9559 }
9560 atts[nbatts++] = attname;
9561 atts[nbatts++] = aprefix;
9562 if (aprefix == NULL)
9563 atts[nbatts++] = NULL;
9564 else
9565 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009566 atts[nbatts++] = defaults->values[5 * i + 2];
9567 atts[nbatts++] = defaults->values[5 * i + 3];
9568 if ((ctxt->standalone == 1) &&
9569 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009570 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009571 "standalone: attribute %s on %s defaulted from external subset\n",
9572 attname, localname);
9573 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009574 nbdef++;
9575 }
9576 }
9577 }
9578 }
9579
Daniel Veillarde70c8772003-11-25 07:21:18 +00009580 /*
9581 * The attributes checkings
9582 */
9583 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009584 /*
9585 * The default namespace does not apply to attribute names.
9586 */
9587 if (atts[i + 1] != NULL) {
9588 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9589 if (nsname == NULL) {
9590 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9591 "Namespace prefix %s for %s on %s is not defined\n",
9592 atts[i + 1], atts[i], localname);
9593 }
9594 atts[i + 2] = nsname;
9595 } else
9596 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009597 /*
9598 * [ WFC: Unique Att Spec ]
9599 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009600 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009601 * As extended by the Namespace in XML REC.
9602 */
9603 for (j = 0; j < i;j += 5) {
9604 if (atts[i] == atts[j]) {
9605 if (atts[i+1] == atts[j+1]) {
9606 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9607 break;
9608 }
9609 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9610 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9611 "Namespaced Attribute %s in '%s' redefined\n",
9612 atts[i], nsname, NULL);
9613 break;
9614 }
9615 }
9616 }
9617 }
9618
Daniel Veillarde57ec792003-09-10 10:50:59 +00009619 nsname = xmlGetNamespace(ctxt, prefix);
9620 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009621 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9622 "Namespace prefix %s on %s is not defined\n",
9623 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009624 }
9625 *pref = prefix;
9626 *URI = nsname;
9627
9628 /*
9629 * SAX: Start of Element !
9630 */
9631 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9632 (!ctxt->disableSAX)) {
9633 if (nbNs > 0)
9634 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9635 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9636 nbatts / 5, nbdef, atts);
9637 else
9638 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9639 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9640 }
9641
9642 /*
9643 * Free up attribute allocated strings if needed
9644 */
9645 if (attval != 0) {
9646 for (i = 3,j = 0; j < nratts;i += 5,j++)
9647 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9648 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009649 }
9650
9651 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009652
9653base_changed:
9654 /*
9655 * the attribute strings are valid iif the base didn't changed
9656 */
9657 if (attval != 0) {
9658 for (i = 3,j = 0; j < nratts;i += 5,j++)
9659 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9660 xmlFree((xmlChar *) atts[i]);
9661 }
9662 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009663 ctxt->input->line = oldline;
9664 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009665 if (ctxt->wellFormed == 1) {
9666 goto reparse;
9667 }
9668 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009669}
9670
9671/**
9672 * xmlParseEndTag2:
9673 * @ctxt: an XML parser context
9674 * @line: line of the start tag
9675 * @nsNr: number of namespaces on the start tag
9676 *
9677 * parse an end of tag
9678 *
9679 * [42] ETag ::= '</' Name S? '>'
9680 *
9681 * With namespace
9682 *
9683 * [NS 9] ETag ::= '</' QName S? '>'
9684 */
9685
9686static void
9687xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009688 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009689 const xmlChar *name;
9690
9691 GROW;
9692 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009693 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009694 return;
9695 }
9696 SKIP(2);
9697
William M. Brack13dfa872004-09-18 04:52:08 +00009698 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009699 if (ctxt->input->cur[tlen] == '>') {
9700 ctxt->input->cur += tlen + 1;
9701 goto done;
9702 }
9703 ctxt->input->cur += tlen;
9704 name = (xmlChar*)1;
9705 } else {
9706 if (prefix == NULL)
9707 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9708 else
9709 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9710 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009711
9712 /*
9713 * We should definitely be at the ending "S? '>'" part
9714 */
9715 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009716 if (ctxt->instate == XML_PARSER_EOF)
9717 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009718 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009719 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009720 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009721 } else
9722 NEXT1;
9723
9724 /*
9725 * [ WFC: Element Type Match ]
9726 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009727 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009728 *
9729 */
9730 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009731 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009732 if ((line == 0) && (ctxt->node != NULL))
9733 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009734 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009735 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009736 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009737 }
9738
9739 /*
9740 * SAX: End of Tag
9741 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009742done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009743 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9744 (!ctxt->disableSAX))
9745 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9746
Daniel Veillard0fb18932003-09-07 09:14:37 +00009747 spacePop(ctxt);
9748 if (nsNr != 0)
9749 nsPop(ctxt, nsNr);
9750 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009751}
9752
9753/**
Owen Taylor3473f882001-02-23 17:55:21 +00009754 * xmlParseCDSect:
9755 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009756 *
Owen Taylor3473f882001-02-23 17:55:21 +00009757 * Parse escaped pure raw content.
9758 *
9759 * [18] CDSect ::= CDStart CData CDEnd
9760 *
9761 * [19] CDStart ::= '<![CDATA['
9762 *
9763 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9764 *
9765 * [21] CDEnd ::= ']]>'
9766 */
9767void
9768xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9769 xmlChar *buf = NULL;
9770 int len = 0;
9771 int size = XML_PARSER_BUFFER_SIZE;
9772 int r, rl;
9773 int s, sl;
9774 int cur, l;
9775 int count = 0;
9776
Daniel Veillard8f597c32003-10-06 08:19:27 +00009777 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009778 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009779 SKIP(9);
9780 } else
9781 return;
9782
9783 ctxt->instate = XML_PARSER_CDATA_SECTION;
9784 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009785 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009786 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009787 ctxt->instate = XML_PARSER_CONTENT;
9788 return;
9789 }
9790 NEXTL(rl);
9791 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009792 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009793 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009794 ctxt->instate = XML_PARSER_CONTENT;
9795 return;
9796 }
9797 NEXTL(sl);
9798 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009799 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009800 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009801 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009802 return;
9803 }
William M. Brack871611b2003-10-18 04:53:14 +00009804 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009805 ((r != ']') || (s != ']') || (cur != '>'))) {
9806 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009807 xmlChar *tmp;
9808
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009809 if ((size > XML_MAX_TEXT_LENGTH) &&
9810 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9811 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9812 "CData section too big found", NULL);
9813 xmlFree (buf);
9814 return;
9815 }
9816 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009817 if (tmp == NULL) {
9818 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009819 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009820 return;
9821 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009822 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009823 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009824 }
9825 COPY_BUF(rl,buf,len,r);
9826 r = s;
9827 rl = sl;
9828 s = cur;
9829 sl = l;
9830 count++;
9831 if (count > 50) {
9832 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009833 if (ctxt->instate == XML_PARSER_EOF) {
9834 xmlFree(buf);
9835 return;
9836 }
Owen Taylor3473f882001-02-23 17:55:21 +00009837 count = 0;
9838 }
9839 NEXTL(l);
9840 cur = CUR_CHAR(l);
9841 }
9842 buf[len] = 0;
9843 ctxt->instate = XML_PARSER_CONTENT;
9844 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009845 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009846 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009847 xmlFree(buf);
9848 return;
9849 }
9850 NEXTL(l);
9851
9852 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009853 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009854 */
9855 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9856 if (ctxt->sax->cdataBlock != NULL)
9857 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009858 else if (ctxt->sax->characters != NULL)
9859 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009860 }
9861 xmlFree(buf);
9862}
9863
9864/**
9865 * xmlParseContent:
9866 * @ctxt: an XML parser context
9867 *
9868 * Parse a content:
9869 *
9870 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9871 */
9872
9873void
9874xmlParseContent(xmlParserCtxtPtr ctxt) {
9875 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009876 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009877 ((RAW != '<') || (NXT(1) != '/')) &&
9878 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009879 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009880 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009881 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009882
9883 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009884 * First case : a Processing Instruction.
9885 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009886 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009887 xmlParsePI(ctxt);
9888 }
9889
9890 /*
9891 * Second case : a CDSection
9892 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009893 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009894 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009895 xmlParseCDSect(ctxt);
9896 }
9897
9898 /*
9899 * Third case : a comment
9900 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009901 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009902 (NXT(2) == '-') && (NXT(3) == '-')) {
9903 xmlParseComment(ctxt);
9904 ctxt->instate = XML_PARSER_CONTENT;
9905 }
9906
9907 /*
9908 * Fourth case : a sub-element.
9909 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009910 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009911 xmlParseElement(ctxt);
9912 }
9913
9914 /*
9915 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009916 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009917 */
9918
Daniel Veillard21a0f912001-02-25 19:54:14 +00009919 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009920 xmlParseReference(ctxt);
9921 }
9922
9923 /*
9924 * Last case, text. Note that References are handled directly.
9925 */
9926 else {
9927 xmlParseCharData(ctxt, 0);
9928 }
9929
9930 GROW;
9931 /*
9932 * Pop-up of finished entities.
9933 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009934 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009935 xmlPopInput(ctxt);
9936 SHRINK;
9937
Daniel Veillardfdc91562002-07-01 21:52:03 +00009938 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009939 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9940 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009941 ctxt->instate = XML_PARSER_EOF;
9942 break;
9943 }
9944 }
9945}
9946
9947/**
9948 * xmlParseElement:
9949 * @ctxt: an XML parser context
9950 *
9951 * parse an XML element, this is highly recursive
9952 *
9953 * [39] element ::= EmptyElemTag | STag content ETag
9954 *
9955 * [ WFC: Element Type Match ]
9956 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009957 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009958 *
Owen Taylor3473f882001-02-23 17:55:21 +00009959 */
9960
9961void
9962xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009963 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009964 const xmlChar *prefix = NULL;
9965 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009966 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009967 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009968 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009969 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009970
Daniel Veillard8915c152008-08-26 13:05:34 +00009971 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9972 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9973 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9974 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9975 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009976 ctxt->instate = XML_PARSER_EOF;
9977 return;
9978 }
9979
Owen Taylor3473f882001-02-23 17:55:21 +00009980 /* Capture start position */
9981 if (ctxt->record_info) {
9982 node_info.begin_pos = ctxt->input->consumed +
9983 (CUR_PTR - ctxt->input->base);
9984 node_info.begin_line = ctxt->input->line;
9985 }
9986
9987 if (ctxt->spaceNr == 0)
9988 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009989 else if (*ctxt->space == -2)
9990 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009991 else
9992 spacePush(ctxt, *ctxt->space);
9993
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009994 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009995#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009996 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009997#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009998 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009999#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010000 else
10001 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010002#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010003 if (ctxt->instate == XML_PARSER_EOF)
10004 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010005 if (name == NULL) {
10006 spacePop(ctxt);
10007 return;
10008 }
10009 namePush(ctxt, name);
10010 ret = ctxt->node;
10011
Daniel Veillard4432df22003-09-28 18:58:27 +000010012#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010013 /*
10014 * [ VC: Root Element Type ]
10015 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010016 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010017 */
10018 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10019 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10020 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010021#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010022
10023 /*
10024 * Check for an Empty Element.
10025 */
10026 if ((RAW == '/') && (NXT(1) == '>')) {
10027 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010028 if (ctxt->sax2) {
10029 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10030 (!ctxt->disableSAX))
10031 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010032#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010033 } else {
10034 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10035 (!ctxt->disableSAX))
10036 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010037#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010038 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010039 namePop(ctxt);
10040 spacePop(ctxt);
10041 if (nsNr != ctxt->nsNr)
10042 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010043 if ( ret != NULL && ctxt->record_info ) {
10044 node_info.end_pos = ctxt->input->consumed +
10045 (CUR_PTR - ctxt->input->base);
10046 node_info.end_line = ctxt->input->line;
10047 node_info.node = ret;
10048 xmlParserAddNodeInfo(ctxt, &node_info);
10049 }
10050 return;
10051 }
10052 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010053 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010054 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010055 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10056 "Couldn't find end of Start Tag %s line %d\n",
10057 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010058
10059 /*
10060 * end of parsing of this node.
10061 */
10062 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010063 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010064 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010065 if (nsNr != ctxt->nsNr)
10066 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010067
10068 /*
10069 * Capture end position and add node
10070 */
10071 if ( ret != NULL && ctxt->record_info ) {
10072 node_info.end_pos = ctxt->input->consumed +
10073 (CUR_PTR - ctxt->input->base);
10074 node_info.end_line = ctxt->input->line;
10075 node_info.node = ret;
10076 xmlParserAddNodeInfo(ctxt, &node_info);
10077 }
10078 return;
10079 }
10080
10081 /*
10082 * Parse the content of the element:
10083 */
10084 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010085 if (ctxt->instate == XML_PARSER_EOF)
10086 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010087 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010088 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010089 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010090 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010091
10092 /*
10093 * end of parsing of this node.
10094 */
10095 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010096 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010097 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010098 if (nsNr != ctxt->nsNr)
10099 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010100 return;
10101 }
10102
10103 /*
10104 * parse the end of tag: '</' should be here.
10105 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010106 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010107 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010108 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010109 }
10110#ifdef LIBXML_SAX1_ENABLED
10111 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010112 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010113#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010114
10115 /*
10116 * Capture end position and add node
10117 */
10118 if ( ret != NULL && ctxt->record_info ) {
10119 node_info.end_pos = ctxt->input->consumed +
10120 (CUR_PTR - ctxt->input->base);
10121 node_info.end_line = ctxt->input->line;
10122 node_info.node = ret;
10123 xmlParserAddNodeInfo(ctxt, &node_info);
10124 }
10125}
10126
10127/**
10128 * xmlParseVersionNum:
10129 * @ctxt: an XML parser context
10130 *
10131 * parse the XML version value.
10132 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010133 * [26] VersionNum ::= '1.' [0-9]+
10134 *
10135 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010136 *
10137 * Returns the string giving the XML version number, or NULL
10138 */
10139xmlChar *
10140xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10141 xmlChar *buf = NULL;
10142 int len = 0;
10143 int size = 10;
10144 xmlChar cur;
10145
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010146 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010147 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010148 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010149 return(NULL);
10150 }
10151 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010152 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010153 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010154 return(NULL);
10155 }
10156 buf[len++] = cur;
10157 NEXT;
10158 cur=CUR;
10159 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010160 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010161 return(NULL);
10162 }
10163 buf[len++] = cur;
10164 NEXT;
10165 cur=CUR;
10166 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010167 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010168 xmlChar *tmp;
10169
Owen Taylor3473f882001-02-23 17:55:21 +000010170 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010171 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10172 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010173 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010174 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010175 return(NULL);
10176 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010177 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010178 }
10179 buf[len++] = cur;
10180 NEXT;
10181 cur=CUR;
10182 }
10183 buf[len] = 0;
10184 return(buf);
10185}
10186
10187/**
10188 * xmlParseVersionInfo:
10189 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010190 *
Owen Taylor3473f882001-02-23 17:55:21 +000010191 * parse the XML version.
10192 *
10193 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010194 *
Owen Taylor3473f882001-02-23 17:55:21 +000010195 * [25] Eq ::= S? '=' S?
10196 *
10197 * Returns the version string, e.g. "1.0"
10198 */
10199
10200xmlChar *
10201xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10202 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010203
Daniel Veillarda07050d2003-10-19 14:46:32 +000010204 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010205 SKIP(7);
10206 SKIP_BLANKS;
10207 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010208 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010209 return(NULL);
10210 }
10211 NEXT;
10212 SKIP_BLANKS;
10213 if (RAW == '"') {
10214 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010215 version = xmlParseVersionNum(ctxt);
10216 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010217 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010218 } else
10219 NEXT;
10220 } else if (RAW == '\''){
10221 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010222 version = xmlParseVersionNum(ctxt);
10223 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010224 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010225 } else
10226 NEXT;
10227 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010228 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010229 }
10230 }
10231 return(version);
10232}
10233
10234/**
10235 * xmlParseEncName:
10236 * @ctxt: an XML parser context
10237 *
10238 * parse the XML encoding name
10239 *
10240 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10241 *
10242 * Returns the encoding name value or NULL
10243 */
10244xmlChar *
10245xmlParseEncName(xmlParserCtxtPtr ctxt) {
10246 xmlChar *buf = NULL;
10247 int len = 0;
10248 int size = 10;
10249 xmlChar cur;
10250
10251 cur = CUR;
10252 if (((cur >= 'a') && (cur <= 'z')) ||
10253 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010254 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010255 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010256 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010257 return(NULL);
10258 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010259
Owen Taylor3473f882001-02-23 17:55:21 +000010260 buf[len++] = cur;
10261 NEXT;
10262 cur = CUR;
10263 while (((cur >= 'a') && (cur <= 'z')) ||
10264 ((cur >= 'A') && (cur <= 'Z')) ||
10265 ((cur >= '0') && (cur <= '9')) ||
10266 (cur == '.') || (cur == '_') ||
10267 (cur == '-')) {
10268 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010269 xmlChar *tmp;
10270
Owen Taylor3473f882001-02-23 17:55:21 +000010271 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010272 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10273 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010274 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010275 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010276 return(NULL);
10277 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010278 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010279 }
10280 buf[len++] = cur;
10281 NEXT;
10282 cur = CUR;
10283 if (cur == 0) {
10284 SHRINK;
10285 GROW;
10286 cur = CUR;
10287 }
10288 }
10289 buf[len] = 0;
10290 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010291 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010292 }
10293 return(buf);
10294}
10295
10296/**
10297 * xmlParseEncodingDecl:
10298 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010299 *
Owen Taylor3473f882001-02-23 17:55:21 +000010300 * parse the XML encoding declaration
10301 *
10302 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10303 *
10304 * this setups the conversion filters.
10305 *
10306 * Returns the encoding value or NULL
10307 */
10308
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010309const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010310xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10311 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010312
10313 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010314 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010315 SKIP(8);
10316 SKIP_BLANKS;
10317 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010318 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010319 return(NULL);
10320 }
10321 NEXT;
10322 SKIP_BLANKS;
10323 if (RAW == '"') {
10324 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010325 encoding = xmlParseEncName(ctxt);
10326 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010327 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010328 } else
10329 NEXT;
10330 } else if (RAW == '\''){
10331 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010332 encoding = xmlParseEncName(ctxt);
10333 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010334 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010335 } else
10336 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010337 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010338 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010339 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010340
10341 /*
10342 * Non standard parsing, allowing the user to ignore encoding
10343 */
10344 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10345 return(encoding);
10346
Daniel Veillard6b621b82003-08-11 15:03:34 +000010347 /*
10348 * UTF-16 encoding stwich has already taken place at this stage,
10349 * more over the little-endian/big-endian selection is already done
10350 */
10351 if ((encoding != NULL) &&
10352 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10353 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010354 /*
10355 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010356 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010357 * document is apparently UTF-8 compatible, then raise an
10358 * encoding mismatch fatal error
10359 */
10360 if ((ctxt->encoding == NULL) &&
10361 (ctxt->input->buf != NULL) &&
10362 (ctxt->input->buf->encoder == NULL)) {
10363 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10364 "Document labelled UTF-16 but has UTF-8 content\n");
10365 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010366 if (ctxt->encoding != NULL)
10367 xmlFree((xmlChar *) ctxt->encoding);
10368 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010369 }
10370 /*
10371 * UTF-8 encoding is handled natively
10372 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010373 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010374 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10375 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010376 if (ctxt->encoding != NULL)
10377 xmlFree((xmlChar *) ctxt->encoding);
10378 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010379 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010380 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010381 xmlCharEncodingHandlerPtr handler;
10382
10383 if (ctxt->input->encoding != NULL)
10384 xmlFree((xmlChar *) ctxt->input->encoding);
10385 ctxt->input->encoding = encoding;
10386
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010387 handler = xmlFindCharEncodingHandler((const char *) encoding);
10388 if (handler != NULL) {
10389 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010390 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010391 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010392 "Unsupported encoding %s\n", encoding);
10393 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010394 }
10395 }
10396 }
10397 return(encoding);
10398}
10399
10400/**
10401 * xmlParseSDDecl:
10402 * @ctxt: an XML parser context
10403 *
10404 * parse the XML standalone declaration
10405 *
10406 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010407 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010408 *
10409 * [ VC: Standalone Document Declaration ]
10410 * TODO The standalone document declaration must have the value "no"
10411 * if any external markup declarations contain declarations of:
10412 * - attributes with default values, if elements to which these
10413 * attributes apply appear in the document without specifications
10414 * of values for these attributes, or
10415 * - entities (other than amp, lt, gt, apos, quot), if references
10416 * to those entities appear in the document, or
10417 * - attributes with values subject to normalization, where the
10418 * attribute appears in the document with a value which will change
10419 * as a result of normalization, or
10420 * - element types with element content, if white space occurs directly
10421 * within any instance of those types.
10422 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010423 * Returns:
10424 * 1 if standalone="yes"
10425 * 0 if standalone="no"
10426 * -2 if standalone attribute is missing or invalid
10427 * (A standalone value of -2 means that the XML declaration was found,
10428 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010429 */
10430
10431int
10432xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010433 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010434
10435 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010436 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010437 SKIP(10);
10438 SKIP_BLANKS;
10439 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010440 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010441 return(standalone);
10442 }
10443 NEXT;
10444 SKIP_BLANKS;
10445 if (RAW == '\''){
10446 NEXT;
10447 if ((RAW == 'n') && (NXT(1) == 'o')) {
10448 standalone = 0;
10449 SKIP(2);
10450 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10451 (NXT(2) == 's')) {
10452 standalone = 1;
10453 SKIP(3);
10454 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010455 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010456 }
10457 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010458 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010459 } else
10460 NEXT;
10461 } else if (RAW == '"'){
10462 NEXT;
10463 if ((RAW == 'n') && (NXT(1) == 'o')) {
10464 standalone = 0;
10465 SKIP(2);
10466 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10467 (NXT(2) == 's')) {
10468 standalone = 1;
10469 SKIP(3);
10470 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010471 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010472 }
10473 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010474 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010475 } else
10476 NEXT;
10477 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010478 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010479 }
10480 }
10481 return(standalone);
10482}
10483
10484/**
10485 * xmlParseXMLDecl:
10486 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010487 *
Owen Taylor3473f882001-02-23 17:55:21 +000010488 * parse an XML declaration header
10489 *
10490 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10491 */
10492
10493void
10494xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10495 xmlChar *version;
10496
10497 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010498 * This value for standalone indicates that the document has an
10499 * XML declaration but it does not have a standalone attribute.
10500 * It will be overwritten later if a standalone attribute is found.
10501 */
10502 ctxt->input->standalone = -2;
10503
10504 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010505 * We know that '<?xml' is here.
10506 */
10507 SKIP(5);
10508
William M. Brack76e95df2003-10-18 16:20:14 +000010509 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010510 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10511 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010512 }
10513 SKIP_BLANKS;
10514
10515 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010516 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010517 */
10518 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010519 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010520 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010521 } else {
10522 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10523 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010524 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010525 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010526 if (ctxt->options & XML_PARSE_OLD10) {
10527 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10528 "Unsupported version '%s'\n",
10529 version);
10530 } else {
10531 if ((version[0] == '1') && ((version[1] == '.'))) {
10532 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10533 "Unsupported version '%s'\n",
10534 version, NULL);
10535 } else {
10536 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10537 "Unsupported version '%s'\n",
10538 version);
10539 }
10540 }
Daniel Veillard19840942001-11-29 16:11:38 +000010541 }
10542 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010543 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010544 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010545 }
Owen Taylor3473f882001-02-23 17:55:21 +000010546
10547 /*
10548 * We may have the encoding declaration
10549 */
William M. Brack76e95df2003-10-18 16:20:14 +000010550 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010551 if ((RAW == '?') && (NXT(1) == '>')) {
10552 SKIP(2);
10553 return;
10554 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010555 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010556 }
10557 xmlParseEncodingDecl(ctxt);
10558 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10559 /*
10560 * The XML REC instructs us to stop parsing right here
10561 */
10562 return;
10563 }
10564
10565 /*
10566 * We may have the standalone status.
10567 */
William M. Brack76e95df2003-10-18 16:20:14 +000010568 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010569 if ((RAW == '?') && (NXT(1) == '>')) {
10570 SKIP(2);
10571 return;
10572 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010573 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010574 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010575
10576 /*
10577 * We can grow the input buffer freely at that point
10578 */
10579 GROW;
10580
Owen Taylor3473f882001-02-23 17:55:21 +000010581 SKIP_BLANKS;
10582 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10583
10584 SKIP_BLANKS;
10585 if ((RAW == '?') && (NXT(1) == '>')) {
10586 SKIP(2);
10587 } else if (RAW == '>') {
10588 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010589 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010590 NEXT;
10591 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010592 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010593 MOVETO_ENDTAG(CUR_PTR);
10594 NEXT;
10595 }
10596}
10597
10598/**
10599 * xmlParseMisc:
10600 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010601 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010602 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010603 *
10604 * [27] Misc ::= Comment | PI | S
10605 */
10606
10607void
10608xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010609 while ((ctxt->instate != XML_PARSER_EOF) &&
10610 (((RAW == '<') && (NXT(1) == '?')) ||
10611 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10612 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010613 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010614 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010615 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010616 NEXT;
10617 } else
10618 xmlParseComment(ctxt);
10619 }
10620}
10621
10622/**
10623 * xmlParseDocument:
10624 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010625 *
Owen Taylor3473f882001-02-23 17:55:21 +000010626 * parse an XML document (and build a tree if using the standard SAX
10627 * interface).
10628 *
10629 * [1] document ::= prolog element Misc*
10630 *
10631 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10632 *
10633 * Returns 0, -1 in case of error. the parser context is augmented
10634 * as a result of the parsing.
10635 */
10636
10637int
10638xmlParseDocument(xmlParserCtxtPtr ctxt) {
10639 xmlChar start[4];
10640 xmlCharEncoding enc;
10641
10642 xmlInitParser();
10643
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010644 if ((ctxt == NULL) || (ctxt->input == NULL))
10645 return(-1);
10646
Owen Taylor3473f882001-02-23 17:55:21 +000010647 GROW;
10648
10649 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010650 * SAX: detecting the level.
10651 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010652 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010653
10654 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010655 * SAX: beginning of the document processing.
10656 */
10657 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10658 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010659 if (ctxt->instate == XML_PARSER_EOF)
10660 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010661
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010662 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010663 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010664 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010665 * Get the 4 first bytes and decode the charset
10666 * if enc != XML_CHAR_ENCODING_NONE
10667 * plug some encoding conversion routines.
10668 */
10669 start[0] = RAW;
10670 start[1] = NXT(1);
10671 start[2] = NXT(2);
10672 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010673 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010674 if (enc != XML_CHAR_ENCODING_NONE) {
10675 xmlSwitchEncoding(ctxt, enc);
10676 }
Owen Taylor3473f882001-02-23 17:55:21 +000010677 }
10678
10679
10680 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010681 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010682 }
10683
10684 /*
10685 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010686 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010687 * than just the first line, unless the amount of data is really
10688 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010689 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010690 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10691 GROW;
10692 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010693 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010694
10695 /*
10696 * Note that we will switch encoding on the fly.
10697 */
10698 xmlParseXMLDecl(ctxt);
10699 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10700 /*
10701 * The XML REC instructs us to stop parsing right here
10702 */
10703 return(-1);
10704 }
10705 ctxt->standalone = ctxt->input->standalone;
10706 SKIP_BLANKS;
10707 } else {
10708 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10709 }
10710 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10711 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010712 if (ctxt->instate == XML_PARSER_EOF)
10713 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010714 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10715 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10716 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10717 }
Owen Taylor3473f882001-02-23 17:55:21 +000010718
10719 /*
10720 * The Misc part of the Prolog
10721 */
10722 GROW;
10723 xmlParseMisc(ctxt);
10724
10725 /*
10726 * Then possibly doc type declaration(s) and more Misc
10727 * (doctypedecl Misc*)?
10728 */
10729 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010730 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010731
10732 ctxt->inSubset = 1;
10733 xmlParseDocTypeDecl(ctxt);
10734 if (RAW == '[') {
10735 ctxt->instate = XML_PARSER_DTD;
10736 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010737 if (ctxt->instate == XML_PARSER_EOF)
10738 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010739 }
10740
10741 /*
10742 * Create and update the external subset.
10743 */
10744 ctxt->inSubset = 2;
10745 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10746 (!ctxt->disableSAX))
10747 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10748 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010749 if (ctxt->instate == XML_PARSER_EOF)
10750 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010751 ctxt->inSubset = 0;
10752
Daniel Veillardac4118d2008-01-11 05:27:32 +000010753 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010754
10755 ctxt->instate = XML_PARSER_PROLOG;
10756 xmlParseMisc(ctxt);
10757 }
10758
10759 /*
10760 * Time to start parsing the tree itself
10761 */
10762 GROW;
10763 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010764 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10765 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010766 } else {
10767 ctxt->instate = XML_PARSER_CONTENT;
10768 xmlParseElement(ctxt);
10769 ctxt->instate = XML_PARSER_EPILOG;
10770
10771
10772 /*
10773 * The Misc part at the end
10774 */
10775 xmlParseMisc(ctxt);
10776
Daniel Veillard561b7f82002-03-20 21:55:57 +000010777 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010778 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010779 }
10780 ctxt->instate = XML_PARSER_EOF;
10781 }
10782
10783 /*
10784 * SAX: end of the document processing.
10785 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010786 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010787 ctxt->sax->endDocument(ctxt->userData);
10788
Daniel Veillard5997aca2002-03-18 18:36:20 +000010789 /*
10790 * Remove locally kept entity definitions if the tree was not built
10791 */
10792 if ((ctxt->myDoc != NULL) &&
10793 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10794 xmlFreeDoc(ctxt->myDoc);
10795 ctxt->myDoc = NULL;
10796 }
10797
Daniel Veillardae0765b2008-07-31 19:54:59 +000010798 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10799 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10800 if (ctxt->valid)
10801 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10802 if (ctxt->nsWellFormed)
10803 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10804 if (ctxt->options & XML_PARSE_OLD10)
10805 ctxt->myDoc->properties |= XML_DOC_OLD10;
10806 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010807 if (! ctxt->wellFormed) {
10808 ctxt->valid = 0;
10809 return(-1);
10810 }
Owen Taylor3473f882001-02-23 17:55:21 +000010811 return(0);
10812}
10813
10814/**
10815 * xmlParseExtParsedEnt:
10816 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010817 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010818 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010819 * An external general parsed entity is well-formed if it matches the
10820 * production labeled extParsedEnt.
10821 *
10822 * [78] extParsedEnt ::= TextDecl? content
10823 *
10824 * Returns 0, -1 in case of error. the parser context is augmented
10825 * as a result of the parsing.
10826 */
10827
10828int
10829xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10830 xmlChar start[4];
10831 xmlCharEncoding enc;
10832
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010833 if ((ctxt == NULL) || (ctxt->input == NULL))
10834 return(-1);
10835
Owen Taylor3473f882001-02-23 17:55:21 +000010836 xmlDefaultSAXHandlerInit();
10837
Daniel Veillard309f81d2003-09-23 09:02:53 +000010838 xmlDetectSAX2(ctxt);
10839
Owen Taylor3473f882001-02-23 17:55:21 +000010840 GROW;
10841
10842 /*
10843 * SAX: beginning of the document processing.
10844 */
10845 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10846 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10847
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010848 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010849 * Get the 4 first bytes and decode the charset
10850 * if enc != XML_CHAR_ENCODING_NONE
10851 * plug some encoding conversion routines.
10852 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010853 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10854 start[0] = RAW;
10855 start[1] = NXT(1);
10856 start[2] = NXT(2);
10857 start[3] = NXT(3);
10858 enc = xmlDetectCharEncoding(start, 4);
10859 if (enc != XML_CHAR_ENCODING_NONE) {
10860 xmlSwitchEncoding(ctxt, enc);
10861 }
Owen Taylor3473f882001-02-23 17:55:21 +000010862 }
10863
10864
10865 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010866 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010867 }
10868
10869 /*
10870 * Check for the XMLDecl in the Prolog.
10871 */
10872 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010873 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010874
10875 /*
10876 * Note that we will switch encoding on the fly.
10877 */
10878 xmlParseXMLDecl(ctxt);
10879 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10880 /*
10881 * The XML REC instructs us to stop parsing right here
10882 */
10883 return(-1);
10884 }
10885 SKIP_BLANKS;
10886 } else {
10887 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10888 }
10889 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10890 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010891 if (ctxt->instate == XML_PARSER_EOF)
10892 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010893
10894 /*
10895 * Doing validity checking on chunk doesn't make sense
10896 */
10897 ctxt->instate = XML_PARSER_CONTENT;
10898 ctxt->validate = 0;
10899 ctxt->loadsubset = 0;
10900 ctxt->depth = 0;
10901
10902 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010903 if (ctxt->instate == XML_PARSER_EOF)
10904 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010905
Owen Taylor3473f882001-02-23 17:55:21 +000010906 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010907 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010908 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010909 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010910 }
10911
10912 /*
10913 * SAX: end of the document processing.
10914 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010915 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010916 ctxt->sax->endDocument(ctxt->userData);
10917
10918 if (! ctxt->wellFormed) return(-1);
10919 return(0);
10920}
10921
Daniel Veillard73b013f2003-09-30 12:36:01 +000010922#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010923/************************************************************************
10924 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010925 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010926 * *
10927 ************************************************************************/
10928
10929/**
10930 * xmlParseLookupSequence:
10931 * @ctxt: an XML parser context
10932 * @first: the first char to lookup
10933 * @next: the next char to lookup or zero
10934 * @third: the next char to lookup or zero
10935 *
10936 * Try to find if a sequence (first, next, third) or just (first next) or
10937 * (first) is available in the input stream.
10938 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10939 * to avoid rescanning sequences of bytes, it DOES change the state of the
10940 * parser, do not use liberally.
10941 *
10942 * Returns the index to the current parsing point if the full sequence
10943 * is available, -1 otherwise.
10944 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010945static int
Owen Taylor3473f882001-02-23 17:55:21 +000010946xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10947 xmlChar next, xmlChar third) {
10948 int base, len;
10949 xmlParserInputPtr in;
10950 const xmlChar *buf;
10951
10952 in = ctxt->input;
10953 if (in == NULL) return(-1);
10954 base = in->cur - in->base;
10955 if (base < 0) return(-1);
10956 if (ctxt->checkIndex > base)
10957 base = ctxt->checkIndex;
10958 if (in->buf == NULL) {
10959 buf = in->base;
10960 len = in->length;
10961 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010962 buf = xmlBufContent(in->buf->buffer);
10963 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010964 }
10965 /* take into account the sequence length */
10966 if (third) len -= 2;
10967 else if (next) len --;
10968 for (;base < len;base++) {
10969 if (buf[base] == first) {
10970 if (third != 0) {
10971 if ((buf[base + 1] != next) ||
10972 (buf[base + 2] != third)) continue;
10973 } else if (next != 0) {
10974 if (buf[base + 1] != next) continue;
10975 }
10976 ctxt->checkIndex = 0;
10977#ifdef DEBUG_PUSH
10978 if (next == 0)
10979 xmlGenericError(xmlGenericErrorContext,
10980 "PP: lookup '%c' found at %d\n",
10981 first, base);
10982 else if (third == 0)
10983 xmlGenericError(xmlGenericErrorContext,
10984 "PP: lookup '%c%c' found at %d\n",
10985 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010986 else
Owen Taylor3473f882001-02-23 17:55:21 +000010987 xmlGenericError(xmlGenericErrorContext,
10988 "PP: lookup '%c%c%c' found at %d\n",
10989 first, next, third, base);
10990#endif
10991 return(base - (in->cur - in->base));
10992 }
10993 }
10994 ctxt->checkIndex = base;
10995#ifdef DEBUG_PUSH
10996 if (next == 0)
10997 xmlGenericError(xmlGenericErrorContext,
10998 "PP: lookup '%c' failed\n", first);
10999 else if (third == 0)
11000 xmlGenericError(xmlGenericErrorContext,
11001 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011002 else
Owen Taylor3473f882001-02-23 17:55:21 +000011003 xmlGenericError(xmlGenericErrorContext,
11004 "PP: lookup '%c%c%c' failed\n", first, next, third);
11005#endif
11006 return(-1);
11007}
11008
11009/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011010 * xmlParseGetLasts:
11011 * @ctxt: an XML parser context
11012 * @lastlt: pointer to store the last '<' from the input
11013 * @lastgt: pointer to store the last '>' from the input
11014 *
11015 * Lookup the last < and > in the current chunk
11016 */
11017static void
11018xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11019 const xmlChar **lastgt) {
11020 const xmlChar *tmp;
11021
11022 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11023 xmlGenericError(xmlGenericErrorContext,
11024 "Internal error: xmlParseGetLasts\n");
11025 return;
11026 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011027 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011028 tmp = ctxt->input->end;
11029 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011030 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011031 if (tmp < ctxt->input->base) {
11032 *lastlt = NULL;
11033 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011034 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011035 *lastlt = tmp;
11036 tmp++;
11037 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11038 if (*tmp == '\'') {
11039 tmp++;
11040 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11041 if (tmp < ctxt->input->end) tmp++;
11042 } else if (*tmp == '"') {
11043 tmp++;
11044 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11045 if (tmp < ctxt->input->end) tmp++;
11046 } else
11047 tmp++;
11048 }
11049 if (tmp < ctxt->input->end)
11050 *lastgt = tmp;
11051 else {
11052 tmp = *lastlt;
11053 tmp--;
11054 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11055 if (tmp >= ctxt->input->base)
11056 *lastgt = tmp;
11057 else
11058 *lastgt = NULL;
11059 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011060 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011061 } else {
11062 *lastlt = NULL;
11063 *lastgt = NULL;
11064 }
11065}
11066/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011067 * xmlCheckCdataPush:
11068 * @cur: pointer to the bock of characters
11069 * @len: length of the block in bytes
11070 *
11071 * Check that the block of characters is okay as SCdata content [20]
11072 *
11073 * Returns the number of bytes to pass if okay, a negative index where an
11074 * UTF-8 error occured otherwise
11075 */
11076static int
11077xmlCheckCdataPush(const xmlChar *utf, int len) {
11078 int ix;
11079 unsigned char c;
11080 int codepoint;
11081
11082 if ((utf == NULL) || (len <= 0))
11083 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011084
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011085 for (ix = 0; ix < len;) { /* string is 0-terminated */
11086 c = utf[ix];
11087 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11088 if (c >= 0x20)
11089 ix++;
11090 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11091 ix++;
11092 else
11093 return(-ix);
11094 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11095 if (ix + 2 > len) return(ix);
11096 if ((utf[ix+1] & 0xc0 ) != 0x80)
11097 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011098 codepoint = (utf[ix] & 0x1f) << 6;
11099 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011100 if (!xmlIsCharQ(codepoint))
11101 return(-ix);
11102 ix += 2;
11103 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11104 if (ix + 3 > len) return(ix);
11105 if (((utf[ix+1] & 0xc0) != 0x80) ||
11106 ((utf[ix+2] & 0xc0) != 0x80))
11107 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011108 codepoint = (utf[ix] & 0xf) << 12;
11109 codepoint |= (utf[ix+1] & 0x3f) << 6;
11110 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011111 if (!xmlIsCharQ(codepoint))
11112 return(-ix);
11113 ix += 3;
11114 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11115 if (ix + 4 > len) return(ix);
11116 if (((utf[ix+1] & 0xc0) != 0x80) ||
11117 ((utf[ix+2] & 0xc0) != 0x80) ||
11118 ((utf[ix+3] & 0xc0) != 0x80))
11119 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011120 codepoint = (utf[ix] & 0x7) << 18;
11121 codepoint |= (utf[ix+1] & 0x3f) << 12;
11122 codepoint |= (utf[ix+2] & 0x3f) << 6;
11123 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011124 if (!xmlIsCharQ(codepoint))
11125 return(-ix);
11126 ix += 4;
11127 } else /* unknown encoding */
11128 return(-ix);
11129 }
11130 return(ix);
11131}
11132
11133/**
Owen Taylor3473f882001-02-23 17:55:21 +000011134 * xmlParseTryOrFinish:
11135 * @ctxt: an XML parser context
11136 * @terminate: last chunk indicator
11137 *
11138 * Try to progress on parsing
11139 *
11140 * Returns zero if no parsing was possible
11141 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011142static int
Owen Taylor3473f882001-02-23 17:55:21 +000011143xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11144 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011145 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011146 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011147 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011148
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011149 if (ctxt->input == NULL)
11150 return(0);
11151
Owen Taylor3473f882001-02-23 17:55:21 +000011152#ifdef DEBUG_PUSH
11153 switch (ctxt->instate) {
11154 case XML_PARSER_EOF:
11155 xmlGenericError(xmlGenericErrorContext,
11156 "PP: try EOF\n"); break;
11157 case XML_PARSER_START:
11158 xmlGenericError(xmlGenericErrorContext,
11159 "PP: try START\n"); break;
11160 case XML_PARSER_MISC:
11161 xmlGenericError(xmlGenericErrorContext,
11162 "PP: try MISC\n");break;
11163 case XML_PARSER_COMMENT:
11164 xmlGenericError(xmlGenericErrorContext,
11165 "PP: try COMMENT\n");break;
11166 case XML_PARSER_PROLOG:
11167 xmlGenericError(xmlGenericErrorContext,
11168 "PP: try PROLOG\n");break;
11169 case XML_PARSER_START_TAG:
11170 xmlGenericError(xmlGenericErrorContext,
11171 "PP: try START_TAG\n");break;
11172 case XML_PARSER_CONTENT:
11173 xmlGenericError(xmlGenericErrorContext,
11174 "PP: try CONTENT\n");break;
11175 case XML_PARSER_CDATA_SECTION:
11176 xmlGenericError(xmlGenericErrorContext,
11177 "PP: try CDATA_SECTION\n");break;
11178 case XML_PARSER_END_TAG:
11179 xmlGenericError(xmlGenericErrorContext,
11180 "PP: try END_TAG\n");break;
11181 case XML_PARSER_ENTITY_DECL:
11182 xmlGenericError(xmlGenericErrorContext,
11183 "PP: try ENTITY_DECL\n");break;
11184 case XML_PARSER_ENTITY_VALUE:
11185 xmlGenericError(xmlGenericErrorContext,
11186 "PP: try ENTITY_VALUE\n");break;
11187 case XML_PARSER_ATTRIBUTE_VALUE:
11188 xmlGenericError(xmlGenericErrorContext,
11189 "PP: try ATTRIBUTE_VALUE\n");break;
11190 case XML_PARSER_DTD:
11191 xmlGenericError(xmlGenericErrorContext,
11192 "PP: try DTD\n");break;
11193 case XML_PARSER_EPILOG:
11194 xmlGenericError(xmlGenericErrorContext,
11195 "PP: try EPILOG\n");break;
11196 case XML_PARSER_PI:
11197 xmlGenericError(xmlGenericErrorContext,
11198 "PP: try PI\n");break;
11199 case XML_PARSER_IGNORE:
11200 xmlGenericError(xmlGenericErrorContext,
11201 "PP: try IGNORE\n");break;
11202 }
11203#endif
11204
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011205 if ((ctxt->input != NULL) &&
11206 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011207 xmlSHRINK(ctxt);
11208 ctxt->checkIndex = 0;
11209 }
11210 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011211
Daniel Veillarde50ba812013-04-11 15:54:51 +080011212 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011213 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011214 return(0);
11215
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011216
Owen Taylor3473f882001-02-23 17:55:21 +000011217 /*
11218 * Pop-up of finished entities.
11219 */
11220 while ((RAW == 0) && (ctxt->inputNr > 1))
11221 xmlPopInput(ctxt);
11222
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011223 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011224 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011225 avail = ctxt->input->length -
11226 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011227 else {
11228 /*
11229 * If we are operating on converted input, try to flush
11230 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011231 * buffer. But do not do this in document start where
11232 * encoding="..." may not have been read and we work on a
11233 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011234 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011235 if ((ctxt->instate != XML_PARSER_START) &&
11236 (ctxt->input->buf->raw != NULL) &&
11237 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011238 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11239 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011240 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011241
11242 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011243 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11244 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011245 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011246 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011247 (ctxt->input->cur - ctxt->input->base);
11248 }
Owen Taylor3473f882001-02-23 17:55:21 +000011249 if (avail < 1)
11250 goto done;
11251 switch (ctxt->instate) {
11252 case XML_PARSER_EOF:
11253 /*
11254 * Document parsing is done !
11255 */
11256 goto done;
11257 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011258 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11259 xmlChar start[4];
11260 xmlCharEncoding enc;
11261
11262 /*
11263 * Very first chars read from the document flow.
11264 */
11265 if (avail < 4)
11266 goto done;
11267
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011268 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011269 * Get the 4 first bytes and decode the charset
11270 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011271 * plug some encoding conversion routines,
11272 * else xmlSwitchEncoding will set to (default)
11273 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011274 */
11275 start[0] = RAW;
11276 start[1] = NXT(1);
11277 start[2] = NXT(2);
11278 start[3] = NXT(3);
11279 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011280 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011281 break;
11282 }
Owen Taylor3473f882001-02-23 17:55:21 +000011283
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011284 if (avail < 2)
11285 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011286 cur = ctxt->input->cur[0];
11287 next = ctxt->input->cur[1];
11288 if (cur == 0) {
11289 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11290 ctxt->sax->setDocumentLocator(ctxt->userData,
11291 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011292 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011293 ctxt->instate = XML_PARSER_EOF;
11294#ifdef DEBUG_PUSH
11295 xmlGenericError(xmlGenericErrorContext,
11296 "PP: entering EOF\n");
11297#endif
11298 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11299 ctxt->sax->endDocument(ctxt->userData);
11300 goto done;
11301 }
11302 if ((cur == '<') && (next == '?')) {
11303 /* PI or XML decl */
11304 if (avail < 5) return(ret);
11305 if ((!terminate) &&
11306 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11307 return(ret);
11308 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11309 ctxt->sax->setDocumentLocator(ctxt->userData,
11310 &xmlDefaultSAXLocator);
11311 if ((ctxt->input->cur[2] == 'x') &&
11312 (ctxt->input->cur[3] == 'm') &&
11313 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011314 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011315 ret += 5;
11316#ifdef DEBUG_PUSH
11317 xmlGenericError(xmlGenericErrorContext,
11318 "PP: Parsing XML Decl\n");
11319#endif
11320 xmlParseXMLDecl(ctxt);
11321 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11322 /*
11323 * The XML REC instructs us to stop parsing right
11324 * here
11325 */
11326 ctxt->instate = XML_PARSER_EOF;
11327 return(0);
11328 }
11329 ctxt->standalone = ctxt->input->standalone;
11330 if ((ctxt->encoding == NULL) &&
11331 (ctxt->input->encoding != NULL))
11332 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11333 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11334 (!ctxt->disableSAX))
11335 ctxt->sax->startDocument(ctxt->userData);
11336 ctxt->instate = XML_PARSER_MISC;
11337#ifdef DEBUG_PUSH
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: entering MISC\n");
11340#endif
11341 } else {
11342 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11343 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11344 (!ctxt->disableSAX))
11345 ctxt->sax->startDocument(ctxt->userData);
11346 ctxt->instate = XML_PARSER_MISC;
11347#ifdef DEBUG_PUSH
11348 xmlGenericError(xmlGenericErrorContext,
11349 "PP: entering MISC\n");
11350#endif
11351 }
11352 } else {
11353 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11354 ctxt->sax->setDocumentLocator(ctxt->userData,
11355 &xmlDefaultSAXLocator);
11356 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011357 if (ctxt->version == NULL) {
11358 xmlErrMemory(ctxt, NULL);
11359 break;
11360 }
Owen Taylor3473f882001-02-23 17:55:21 +000011361 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11362 (!ctxt->disableSAX))
11363 ctxt->sax->startDocument(ctxt->userData);
11364 ctxt->instate = XML_PARSER_MISC;
11365#ifdef DEBUG_PUSH
11366 xmlGenericError(xmlGenericErrorContext,
11367 "PP: entering MISC\n");
11368#endif
11369 }
11370 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011371 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011372 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011373 const xmlChar *prefix = NULL;
11374 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011375 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011376
11377 if ((avail < 2) && (ctxt->inputNr == 1))
11378 goto done;
11379 cur = ctxt->input->cur[0];
11380 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011381 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011382 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011383 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11384 ctxt->sax->endDocument(ctxt->userData);
11385 goto done;
11386 }
11387 if (!terminate) {
11388 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011389 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011390 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011391 goto done;
11392 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11393 goto done;
11394 }
11395 }
11396 if (ctxt->spaceNr == 0)
11397 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011398 else if (*ctxt->space == -2)
11399 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011400 else
11401 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011402#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011403 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011404#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011405 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011406#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011407 else
11408 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011409#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011410 if (ctxt->instate == XML_PARSER_EOF)
11411 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011412 if (name == NULL) {
11413 spacePop(ctxt);
11414 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011415 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11416 ctxt->sax->endDocument(ctxt->userData);
11417 goto done;
11418 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011419#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011420 /*
11421 * [ VC: Root Element Type ]
11422 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011423 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011424 */
11425 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11426 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11427 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011428#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011429
11430 /*
11431 * Check for an Empty Element.
11432 */
11433 if ((RAW == '/') && (NXT(1) == '>')) {
11434 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011435
11436 if (ctxt->sax2) {
11437 if ((ctxt->sax != NULL) &&
11438 (ctxt->sax->endElementNs != NULL) &&
11439 (!ctxt->disableSAX))
11440 ctxt->sax->endElementNs(ctxt->userData, name,
11441 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011442 if (ctxt->nsNr - nsNr > 0)
11443 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011444#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011445 } else {
11446 if ((ctxt->sax != NULL) &&
11447 (ctxt->sax->endElement != NULL) &&
11448 (!ctxt->disableSAX))
11449 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011450#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011451 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011452 if (ctxt->instate == XML_PARSER_EOF)
11453 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011454 spacePop(ctxt);
11455 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011456 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011457 } else {
11458 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011459 }
Daniel Veillard65686452012-07-19 18:25:01 +080011460 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011461 break;
11462 }
11463 if (RAW == '>') {
11464 NEXT;
11465 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011466 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011467 "Couldn't find end of Start Tag %s\n",
11468 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011469 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011470 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011471 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011472 if (ctxt->sax2)
11473 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011474#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011475 else
11476 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011477#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011478
Daniel Veillarda880b122003-04-21 21:36:41 +000011479 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011480 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011481 break;
11482 }
11483 case XML_PARSER_CONTENT: {
11484 const xmlChar *test;
11485 unsigned int cons;
11486 if ((avail < 2) && (ctxt->inputNr == 1))
11487 goto done;
11488 cur = ctxt->input->cur[0];
11489 next = ctxt->input->cur[1];
11490
11491 test = CUR_PTR;
11492 cons = ctxt->input->consumed;
11493 if ((cur == '<') && (next == '/')) {
11494 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011495 break;
11496 } else if ((cur == '<') && (next == '?')) {
11497 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011498 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11499 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011500 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011501 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011502 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011503 ctxt->instate = XML_PARSER_CONTENT;
11504 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011505 } else if ((cur == '<') && (next != '!')) {
11506 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011507 break;
11508 } else if ((cur == '<') && (next == '!') &&
11509 (ctxt->input->cur[2] == '-') &&
11510 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011511 int term;
11512
11513 if (avail < 4)
11514 goto done;
11515 ctxt->input->cur += 4;
11516 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11517 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011518 if ((!terminate) && (term < 0)) {
11519 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011520 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011521 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011522 xmlParseComment(ctxt);
11523 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011524 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011525 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11526 (ctxt->input->cur[2] == '[') &&
11527 (ctxt->input->cur[3] == 'C') &&
11528 (ctxt->input->cur[4] == 'D') &&
11529 (ctxt->input->cur[5] == 'A') &&
11530 (ctxt->input->cur[6] == 'T') &&
11531 (ctxt->input->cur[7] == 'A') &&
11532 (ctxt->input->cur[8] == '[')) {
11533 SKIP(9);
11534 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011535 break;
11536 } else if ((cur == '<') && (next == '!') &&
11537 (avail < 9)) {
11538 goto done;
11539 } else if (cur == '&') {
11540 if ((!terminate) &&
11541 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11542 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011543 xmlParseReference(ctxt);
11544 } else {
11545 /* TODO Avoid the extra copy, handle directly !!! */
11546 /*
11547 * Goal of the following test is:
11548 * - minimize calls to the SAX 'character' callback
11549 * when they are mergeable
11550 * - handle an problem for isBlank when we only parse
11551 * a sequence of blank chars and the next one is
11552 * not available to check against '<' presence.
11553 * - tries to homogenize the differences in SAX
11554 * callbacks between the push and pull versions
11555 * of the parser.
11556 */
11557 if ((ctxt->inputNr == 1) &&
11558 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11559 if (!terminate) {
11560 if (ctxt->progressive) {
11561 if ((lastlt == NULL) ||
11562 (ctxt->input->cur > lastlt))
11563 goto done;
11564 } else if (xmlParseLookupSequence(ctxt,
11565 '<', 0, 0) < 0) {
11566 goto done;
11567 }
11568 }
11569 }
11570 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011571 xmlParseCharData(ctxt, 0);
11572 }
11573 /*
11574 * Pop-up of finished entities.
11575 */
11576 while ((RAW == 0) && (ctxt->inputNr > 1))
11577 xmlPopInput(ctxt);
11578 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011579 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11580 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011581 ctxt->instate = XML_PARSER_EOF;
11582 break;
11583 }
11584 break;
11585 }
11586 case XML_PARSER_END_TAG:
11587 if (avail < 2)
11588 goto done;
11589 if (!terminate) {
11590 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011591 /* > can be found unescaped in attribute values */
11592 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011593 goto done;
11594 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11595 goto done;
11596 }
11597 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011598 if (ctxt->sax2) {
11599 xmlParseEndTag2(ctxt,
11600 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11601 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011602 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011603 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011604 }
11605#ifdef LIBXML_SAX1_ENABLED
11606 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011607 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011608#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011609 if (ctxt->instate == XML_PARSER_EOF) {
11610 /* Nothing */
11611 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011612 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011613 } else {
11614 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011615 }
11616 break;
11617 case XML_PARSER_CDATA_SECTION: {
11618 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011619 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011620 * cdataBlock merge back contiguous callbacks.
11621 */
11622 int base;
11623
11624 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11625 if (base < 0) {
11626 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011627 int tmp;
11628
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011629 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011630 XML_PARSER_BIG_BUFFER_SIZE);
11631 if (tmp < 0) {
11632 tmp = -tmp;
11633 ctxt->input->cur += tmp;
11634 goto encoding_error;
11635 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011636 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11637 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011638 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011639 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011640 else if (ctxt->sax->characters != NULL)
11641 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011642 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011643 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011644 if (ctxt->instate == XML_PARSER_EOF)
11645 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011646 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011647 ctxt->checkIndex = 0;
11648 }
11649 goto done;
11650 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011651 int tmp;
11652
11653 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11654 if ((tmp < 0) || (tmp != base)) {
11655 tmp = -tmp;
11656 ctxt->input->cur += tmp;
11657 goto encoding_error;
11658 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011659 if ((ctxt->sax != NULL) && (base == 0) &&
11660 (ctxt->sax->cdataBlock != NULL) &&
11661 (!ctxt->disableSAX)) {
11662 /*
11663 * Special case to provide identical behaviour
11664 * between pull and push parsers on enpty CDATA
11665 * sections
11666 */
11667 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11668 (!strncmp((const char *)&ctxt->input->cur[-9],
11669 "<![CDATA[", 9)))
11670 ctxt->sax->cdataBlock(ctxt->userData,
11671 BAD_CAST "", 0);
11672 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011673 (!ctxt->disableSAX)) {
11674 if (ctxt->sax->cdataBlock != NULL)
11675 ctxt->sax->cdataBlock(ctxt->userData,
11676 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011677 else if (ctxt->sax->characters != NULL)
11678 ctxt->sax->characters(ctxt->userData,
11679 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011680 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011681 if (ctxt->instate == XML_PARSER_EOF)
11682 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011683 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011684 ctxt->checkIndex = 0;
11685 ctxt->instate = XML_PARSER_CONTENT;
11686#ifdef DEBUG_PUSH
11687 xmlGenericError(xmlGenericErrorContext,
11688 "PP: entering CONTENT\n");
11689#endif
11690 }
11691 break;
11692 }
Owen Taylor3473f882001-02-23 17:55:21 +000011693 case XML_PARSER_MISC:
11694 SKIP_BLANKS;
11695 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011696 avail = ctxt->input->length -
11697 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011698 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011699 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011700 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011701 if (avail < 2)
11702 goto done;
11703 cur = ctxt->input->cur[0];
11704 next = ctxt->input->cur[1];
11705 if ((cur == '<') && (next == '?')) {
11706 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011707 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11708 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011709 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011710 }
Owen Taylor3473f882001-02-23 17:55:21 +000011711#ifdef DEBUG_PUSH
11712 xmlGenericError(xmlGenericErrorContext,
11713 "PP: Parsing PI\n");
11714#endif
11715 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011716 if (ctxt->instate == XML_PARSER_EOF)
11717 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011718 ctxt->instate = XML_PARSER_MISC;
11719 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011720 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011721 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011722 (ctxt->input->cur[2] == '-') &&
11723 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011724 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011725 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11726 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011727 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011728 }
Owen Taylor3473f882001-02-23 17:55:21 +000011729#ifdef DEBUG_PUSH
11730 xmlGenericError(xmlGenericErrorContext,
11731 "PP: Parsing Comment\n");
11732#endif
11733 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011734 if (ctxt->instate == XML_PARSER_EOF)
11735 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011736 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011737 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011738 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011739 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011740 (ctxt->input->cur[2] == 'D') &&
11741 (ctxt->input->cur[3] == 'O') &&
11742 (ctxt->input->cur[4] == 'C') &&
11743 (ctxt->input->cur[5] == 'T') &&
11744 (ctxt->input->cur[6] == 'Y') &&
11745 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011746 (ctxt->input->cur[8] == 'E')) {
11747 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011748 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11749 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011750 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011751 }
Owen Taylor3473f882001-02-23 17:55:21 +000011752#ifdef DEBUG_PUSH
11753 xmlGenericError(xmlGenericErrorContext,
11754 "PP: Parsing internal subset\n");
11755#endif
11756 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011757 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011758 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011759 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011760 if (ctxt->instate == XML_PARSER_EOF)
11761 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011762 if (RAW == '[') {
11763 ctxt->instate = XML_PARSER_DTD;
11764#ifdef DEBUG_PUSH
11765 xmlGenericError(xmlGenericErrorContext,
11766 "PP: entering DTD\n");
11767#endif
11768 } else {
11769 /*
11770 * Create and update the external subset.
11771 */
11772 ctxt->inSubset = 2;
11773 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11774 (ctxt->sax->externalSubset != NULL))
11775 ctxt->sax->externalSubset(ctxt->userData,
11776 ctxt->intSubName, ctxt->extSubSystem,
11777 ctxt->extSubURI);
11778 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011779 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011780 ctxt->instate = XML_PARSER_PROLOG;
11781#ifdef DEBUG_PUSH
11782 xmlGenericError(xmlGenericErrorContext,
11783 "PP: entering PROLOG\n");
11784#endif
11785 }
11786 } else if ((cur == '<') && (next == '!') &&
11787 (avail < 9)) {
11788 goto done;
11789 } else {
11790 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011791 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011792 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011793#ifdef DEBUG_PUSH
11794 xmlGenericError(xmlGenericErrorContext,
11795 "PP: entering START_TAG\n");
11796#endif
11797 }
11798 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011799 case XML_PARSER_PROLOG:
11800 SKIP_BLANKS;
11801 if (ctxt->input->buf == NULL)
11802 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11803 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011804 avail = xmlBufUse(ctxt->input->buf->buffer) -
11805 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011806 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011807 goto done;
11808 cur = ctxt->input->cur[0];
11809 next = ctxt->input->cur[1];
11810 if ((cur == '<') && (next == '?')) {
11811 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011812 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11813 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011814 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011815 }
Owen Taylor3473f882001-02-23 17:55:21 +000011816#ifdef DEBUG_PUSH
11817 xmlGenericError(xmlGenericErrorContext,
11818 "PP: Parsing PI\n");
11819#endif
11820 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011821 if (ctxt->instate == XML_PARSER_EOF)
11822 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011823 ctxt->instate = XML_PARSER_PROLOG;
11824 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011825 } else if ((cur == '<') && (next == '!') &&
11826 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11827 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011828 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11829 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011830 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011831 }
Owen Taylor3473f882001-02-23 17:55:21 +000011832#ifdef DEBUG_PUSH
11833 xmlGenericError(xmlGenericErrorContext,
11834 "PP: Parsing Comment\n");
11835#endif
11836 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011837 if (ctxt->instate == XML_PARSER_EOF)
11838 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011839 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011840 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011841 } else if ((cur == '<') && (next == '!') &&
11842 (avail < 4)) {
11843 goto done;
11844 } else {
11845 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011846 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011847 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011848 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011849#ifdef DEBUG_PUSH
11850 xmlGenericError(xmlGenericErrorContext,
11851 "PP: entering START_TAG\n");
11852#endif
11853 }
11854 break;
11855 case XML_PARSER_EPILOG:
11856 SKIP_BLANKS;
11857 if (ctxt->input->buf == NULL)
11858 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11859 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011860 avail = xmlBufUse(ctxt->input->buf->buffer) -
11861 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011862 if (avail < 2)
11863 goto done;
11864 cur = ctxt->input->cur[0];
11865 next = ctxt->input->cur[1];
11866 if ((cur == '<') && (next == '?')) {
11867 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011868 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11869 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011870 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011871 }
Owen Taylor3473f882001-02-23 17:55:21 +000011872#ifdef DEBUG_PUSH
11873 xmlGenericError(xmlGenericErrorContext,
11874 "PP: Parsing PI\n");
11875#endif
11876 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011877 if (ctxt->instate == XML_PARSER_EOF)
11878 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011879 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011880 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011881 } else if ((cur == '<') && (next == '!') &&
11882 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11883 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011884 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11885 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011886 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011887 }
Owen Taylor3473f882001-02-23 17:55:21 +000011888#ifdef DEBUG_PUSH
11889 xmlGenericError(xmlGenericErrorContext,
11890 "PP: Parsing Comment\n");
11891#endif
11892 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011893 if (ctxt->instate == XML_PARSER_EOF)
11894 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011895 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011896 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011897 } else if ((cur == '<') && (next == '!') &&
11898 (avail < 4)) {
11899 goto done;
11900 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011901 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011902 ctxt->instate = XML_PARSER_EOF;
11903#ifdef DEBUG_PUSH
11904 xmlGenericError(xmlGenericErrorContext,
11905 "PP: entering EOF\n");
11906#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011907 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011908 ctxt->sax->endDocument(ctxt->userData);
11909 goto done;
11910 }
11911 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011912 case XML_PARSER_DTD: {
11913 /*
11914 * Sorry but progressive parsing of the internal subset
11915 * is not expected to be supported. We first check that
11916 * the full content of the internal subset is available and
11917 * the parsing is launched only at that point.
11918 * Internal subset ends up with "']' S? '>'" in an unescaped
11919 * section and not in a ']]>' sequence which are conditional
11920 * sections (whoever argued to keep that crap in XML deserve
11921 * a place in hell !).
11922 */
11923 int base, i;
11924 xmlChar *buf;
11925 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011926 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011927
11928 base = ctxt->input->cur - ctxt->input->base;
11929 if (base < 0) return(0);
11930 if (ctxt->checkIndex > base)
11931 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011932 buf = xmlBufContent(ctxt->input->buf->buffer);
11933 use = xmlBufUse(ctxt->input->buf->buffer);
11934 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011935 if (quote != 0) {
11936 if (buf[base] == quote)
11937 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011938 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011939 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011940 if ((quote == 0) && (buf[base] == '<')) {
11941 int found = 0;
11942 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011943 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011944 (buf[base + 1] == '!') &&
11945 (buf[base + 2] == '-') &&
11946 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011947 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011948 if ((buf[base] == '-') &&
11949 (buf[base + 1] == '-') &&
11950 (buf[base + 2] == '>')) {
11951 found = 1;
11952 base += 2;
11953 break;
11954 }
11955 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011956 if (!found) {
11957#if 0
11958 fprintf(stderr, "unfinished comment\n");
11959#endif
11960 break; /* for */
11961 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011962 continue;
11963 }
11964 }
Owen Taylor3473f882001-02-23 17:55:21 +000011965 if (buf[base] == '"') {
11966 quote = '"';
11967 continue;
11968 }
11969 if (buf[base] == '\'') {
11970 quote = '\'';
11971 continue;
11972 }
11973 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011974#if 0
11975 fprintf(stderr, "%c%c%c%c: ", buf[base],
11976 buf[base + 1], buf[base + 2], buf[base + 3]);
11977#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011978 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011979 break;
11980 if (buf[base + 1] == ']') {
11981 /* conditional crap, skip both ']' ! */
11982 base++;
11983 continue;
11984 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011985 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011986 if (buf[base + i] == '>') {
11987#if 0
11988 fprintf(stderr, "found\n");
11989#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011990 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011991 }
11992 if (!IS_BLANK_CH(buf[base + i])) {
11993#if 0
11994 fprintf(stderr, "not found\n");
11995#endif
11996 goto not_end_of_int_subset;
11997 }
Owen Taylor3473f882001-02-23 17:55:21 +000011998 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011999#if 0
12000 fprintf(stderr, "end of stream\n");
12001#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012002 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012003
Owen Taylor3473f882001-02-23 17:55:21 +000012004 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012005not_end_of_int_subset:
12006 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012007 }
12008 /*
12009 * We didn't found the end of the Internal subset
12010 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012011 if (quote == 0)
12012 ctxt->checkIndex = base;
12013 else
12014 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012015#ifdef DEBUG_PUSH
12016 if (next == 0)
12017 xmlGenericError(xmlGenericErrorContext,
12018 "PP: lookup of int subset end filed\n");
12019#endif
12020 goto done;
12021
12022found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012023 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012024 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012025 if (ctxt->instate == XML_PARSER_EOF)
12026 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012027 ctxt->inSubset = 2;
12028 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12029 (ctxt->sax->externalSubset != NULL))
12030 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12031 ctxt->extSubSystem, ctxt->extSubURI);
12032 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012033 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012034 if (ctxt->instate == XML_PARSER_EOF)
12035 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012036 ctxt->instate = XML_PARSER_PROLOG;
12037 ctxt->checkIndex = 0;
12038#ifdef DEBUG_PUSH
12039 xmlGenericError(xmlGenericErrorContext,
12040 "PP: entering PROLOG\n");
12041#endif
12042 break;
12043 }
12044 case XML_PARSER_COMMENT:
12045 xmlGenericError(xmlGenericErrorContext,
12046 "PP: internal error, state == COMMENT\n");
12047 ctxt->instate = XML_PARSER_CONTENT;
12048#ifdef DEBUG_PUSH
12049 xmlGenericError(xmlGenericErrorContext,
12050 "PP: entering CONTENT\n");
12051#endif
12052 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012053 case XML_PARSER_IGNORE:
12054 xmlGenericError(xmlGenericErrorContext,
12055 "PP: internal error, state == IGNORE");
12056 ctxt->instate = XML_PARSER_DTD;
12057#ifdef DEBUG_PUSH
12058 xmlGenericError(xmlGenericErrorContext,
12059 "PP: entering DTD\n");
12060#endif
12061 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012062 case XML_PARSER_PI:
12063 xmlGenericError(xmlGenericErrorContext,
12064 "PP: internal error, state == PI\n");
12065 ctxt->instate = XML_PARSER_CONTENT;
12066#ifdef DEBUG_PUSH
12067 xmlGenericError(xmlGenericErrorContext,
12068 "PP: entering CONTENT\n");
12069#endif
12070 break;
12071 case XML_PARSER_ENTITY_DECL:
12072 xmlGenericError(xmlGenericErrorContext,
12073 "PP: internal error, state == ENTITY_DECL\n");
12074 ctxt->instate = XML_PARSER_DTD;
12075#ifdef DEBUG_PUSH
12076 xmlGenericError(xmlGenericErrorContext,
12077 "PP: entering DTD\n");
12078#endif
12079 break;
12080 case XML_PARSER_ENTITY_VALUE:
12081 xmlGenericError(xmlGenericErrorContext,
12082 "PP: internal error, state == ENTITY_VALUE\n");
12083 ctxt->instate = XML_PARSER_CONTENT;
12084#ifdef DEBUG_PUSH
12085 xmlGenericError(xmlGenericErrorContext,
12086 "PP: entering DTD\n");
12087#endif
12088 break;
12089 case XML_PARSER_ATTRIBUTE_VALUE:
12090 xmlGenericError(xmlGenericErrorContext,
12091 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12092 ctxt->instate = XML_PARSER_START_TAG;
12093#ifdef DEBUG_PUSH
12094 xmlGenericError(xmlGenericErrorContext,
12095 "PP: entering START_TAG\n");
12096#endif
12097 break;
12098 case XML_PARSER_SYSTEM_LITERAL:
12099 xmlGenericError(xmlGenericErrorContext,
12100 "PP: internal error, state == SYSTEM_LITERAL\n");
12101 ctxt->instate = XML_PARSER_START_TAG;
12102#ifdef DEBUG_PUSH
12103 xmlGenericError(xmlGenericErrorContext,
12104 "PP: entering START_TAG\n");
12105#endif
12106 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012107 case XML_PARSER_PUBLIC_LITERAL:
12108 xmlGenericError(xmlGenericErrorContext,
12109 "PP: internal error, state == PUBLIC_LITERAL\n");
12110 ctxt->instate = XML_PARSER_START_TAG;
12111#ifdef DEBUG_PUSH
12112 xmlGenericError(xmlGenericErrorContext,
12113 "PP: entering START_TAG\n");
12114#endif
12115 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012116 }
12117 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012118done:
Owen Taylor3473f882001-02-23 17:55:21 +000012119#ifdef DEBUG_PUSH
12120 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12121#endif
12122 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012123encoding_error:
12124 {
12125 char buffer[150];
12126
12127 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12128 ctxt->input->cur[0], ctxt->input->cur[1],
12129 ctxt->input->cur[2], ctxt->input->cur[3]);
12130 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12131 "Input is not proper UTF-8, indicate encoding !\n%s",
12132 BAD_CAST buffer, NULL);
12133 }
12134 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012135}
12136
12137/**
Daniel Veillard65686452012-07-19 18:25:01 +080012138 * xmlParseCheckTransition:
12139 * @ctxt: an XML parser context
12140 * @chunk: a char array
12141 * @size: the size in byte of the chunk
12142 *
12143 * Check depending on the current parser state if the chunk given must be
12144 * processed immediately or one need more data to advance on parsing.
12145 *
12146 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12147 */
12148static int
12149xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12150 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12151 return(-1);
12152 if (ctxt->instate == XML_PARSER_START_TAG) {
12153 if (memchr(chunk, '>', size) != NULL)
12154 return(1);
12155 return(0);
12156 }
12157 if (ctxt->progressive == XML_PARSER_COMMENT) {
12158 if (memchr(chunk, '>', size) != NULL)
12159 return(1);
12160 return(0);
12161 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012162 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12163 if (memchr(chunk, '>', size) != NULL)
12164 return(1);
12165 return(0);
12166 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012167 if (ctxt->progressive == XML_PARSER_PI) {
12168 if (memchr(chunk, '>', size) != NULL)
12169 return(1);
12170 return(0);
12171 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012172 if (ctxt->instate == XML_PARSER_END_TAG) {
12173 if (memchr(chunk, '>', size) != NULL)
12174 return(1);
12175 return(0);
12176 }
12177 if ((ctxt->progressive == XML_PARSER_DTD) ||
12178 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012179 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012180 return(1);
12181 return(0);
12182 }
Daniel Veillard65686452012-07-19 18:25:01 +080012183 return(1);
12184}
12185
12186/**
Owen Taylor3473f882001-02-23 17:55:21 +000012187 * xmlParseChunk:
12188 * @ctxt: an XML parser context
12189 * @chunk: an char array
12190 * @size: the size in byte of the chunk
12191 * @terminate: last chunk indicator
12192 *
12193 * Parse a Chunk of memory
12194 *
12195 * Returns zero if no error, the xmlParserErrors otherwise.
12196 */
12197int
12198xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12199 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012200 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012201 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012202 size_t old_avail = 0;
12203 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012204
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012205 if (ctxt == NULL)
12206 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012207 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012208 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012209 if (ctxt->instate == XML_PARSER_EOF)
12210 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012211 if (ctxt->instate == XML_PARSER_START)
12212 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012213 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12214 (chunk[size - 1] == '\r')) {
12215 end_in_lf = 1;
12216 size--;
12217 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012218
12219xmldecl_done:
12220
Owen Taylor3473f882001-02-23 17:55:21 +000012221 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12222 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012223 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12224 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012225 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012226
Daniel Veillard65686452012-07-19 18:25:01 +080012227 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012228 /*
12229 * Specific handling if we autodetected an encoding, we should not
12230 * push more than the first line ... which depend on the encoding
12231 * And only push the rest once the final encoding was detected
12232 */
12233 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12234 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012235 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012236
12237 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12238 BAD_CAST "UTF-16")) ||
12239 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12240 BAD_CAST "UTF16")))
12241 len = 90;
12242 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12243 BAD_CAST "UCS-4")) ||
12244 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12245 BAD_CAST "UCS4")))
12246 len = 180;
12247
12248 if (ctxt->input->buf->rawconsumed < len)
12249 len -= ctxt->input->buf->rawconsumed;
12250
Raul Hudeaba9716a2010-03-15 10:13:29 +010012251 /*
12252 * Change size for reading the initial declaration only
12253 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12254 * will blindly copy extra bytes from memory.
12255 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012256 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012257 remain = size - len;
12258 size = len;
12259 } else {
12260 remain = 0;
12261 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012262 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012263 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012264 if (res < 0) {
12265 ctxt->errNo = XML_PARSER_EOF;
12266 ctxt->disableSAX = 1;
12267 return (XML_PARSER_EOF);
12268 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012269 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012270#ifdef DEBUG_PUSH
12271 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12272#endif
12273
Owen Taylor3473f882001-02-23 17:55:21 +000012274 } else if (ctxt->instate != XML_PARSER_EOF) {
12275 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12276 xmlParserInputBufferPtr in = ctxt->input->buf;
12277 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12278 (in->raw != NULL)) {
12279 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012280 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12281 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012282
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012283 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012284 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012285 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012286 xmlGenericError(xmlGenericErrorContext,
12287 "xmlParseChunk: encoder error\n");
12288 return(XML_ERR_INVALID_ENCODING);
12289 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012290 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012291 }
12292 }
12293 }
Daniel Veillard65686452012-07-19 18:25:01 +080012294 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012295 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012296 } else {
12297 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12298 avail = xmlBufUse(ctxt->input->buf->buffer);
12299 /*
12300 * Depending on the current state it may not be such
12301 * a good idea to try parsing if there is nothing in the chunk
12302 * which would be worth doing a parser state transition and we
12303 * need to wait for more data
12304 */
12305 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12306 (old_avail == 0) || (avail == 0) ||
12307 (xmlParseCheckTransition(ctxt,
12308 (const char *)&ctxt->input->base[old_avail],
12309 avail - old_avail)))
12310 xmlParseTryOrFinish(ctxt, terminate);
12311 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012312 if (ctxt->instate == XML_PARSER_EOF)
12313 return(ctxt->errNo);
12314
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012315 if ((ctxt->input != NULL) &&
12316 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12317 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12318 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12319 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12320 ctxt->instate = XML_PARSER_EOF;
12321 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012322 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12323 return(ctxt->errNo);
12324
12325 if (remain != 0) {
12326 chunk += size;
12327 size = remain;
12328 remain = 0;
12329 goto xmldecl_done;
12330 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012331 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12332 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012333 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12334 ctxt->input);
12335 size_t current = ctxt->input->cur - ctxt->input->base;
12336
Daniel Veillarda617e242006-01-09 14:38:44 +000012337 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012338
12339 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12340 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012341 }
Owen Taylor3473f882001-02-23 17:55:21 +000012342 if (terminate) {
12343 /*
12344 * Check for termination
12345 */
Daniel Veillard65686452012-07-19 18:25:01 +080012346 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012347
12348 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012349 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012350 cur_avail = ctxt->input->length -
12351 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012352 else
Daniel Veillard65686452012-07-19 18:25:01 +080012353 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12354 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012355 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012356
Owen Taylor3473f882001-02-23 17:55:21 +000012357 if ((ctxt->instate != XML_PARSER_EOF) &&
12358 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012359 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012360 }
Daniel Veillard65686452012-07-19 18:25:01 +080012361 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012362 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012363 }
Owen Taylor3473f882001-02-23 17:55:21 +000012364 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012365 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012366 ctxt->sax->endDocument(ctxt->userData);
12367 }
12368 ctxt->instate = XML_PARSER_EOF;
12369 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012370 if (ctxt->wellFormed == 0)
12371 return((xmlParserErrors) ctxt->errNo);
12372 else
12373 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012374}
12375
12376/************************************************************************
12377 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012378 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012379 * *
12380 ************************************************************************/
12381
12382/**
Owen Taylor3473f882001-02-23 17:55:21 +000012383 * xmlCreatePushParserCtxt:
12384 * @sax: a SAX handler
12385 * @user_data: The user data returned on SAX callbacks
12386 * @chunk: a pointer to an array of chars
12387 * @size: number of chars in the array
12388 * @filename: an optional file name or URI
12389 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012390 * Create a parser context for using the XML parser in push mode.
12391 * If @buffer and @size are non-NULL, the data is used to detect
12392 * the encoding. The remaining characters will be parsed so they
12393 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012394 * To allow content encoding detection, @size should be >= 4
12395 * The value of @filename is used for fetching external entities
12396 * and error/warning reports.
12397 *
12398 * Returns the new parser context or NULL
12399 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012400
Owen Taylor3473f882001-02-23 17:55:21 +000012401xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012402xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012403 const char *chunk, int size, const char *filename) {
12404 xmlParserCtxtPtr ctxt;
12405 xmlParserInputPtr inputStream;
12406 xmlParserInputBufferPtr buf;
12407 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12408
12409 /*
12410 * plug some encoding conversion routines
12411 */
12412 if ((chunk != NULL) && (size >= 4))
12413 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12414
12415 buf = xmlAllocParserInputBuffer(enc);
12416 if (buf == NULL) return(NULL);
12417
12418 ctxt = xmlNewParserCtxt();
12419 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012420 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012421 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012422 return(NULL);
12423 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012424 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012425 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12426 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012427 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012428 xmlFreeParserInputBuffer(buf);
12429 xmlFreeParserCtxt(ctxt);
12430 return(NULL);
12431 }
Owen Taylor3473f882001-02-23 17:55:21 +000012432 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012433#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012434 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012435#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012436 xmlFree(ctxt->sax);
12437 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12438 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012439 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012440 xmlFreeParserInputBuffer(buf);
12441 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012442 return(NULL);
12443 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012444 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12445 if (sax->initialized == XML_SAX2_MAGIC)
12446 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12447 else
12448 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012449 if (user_data != NULL)
12450 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012451 }
Owen Taylor3473f882001-02-23 17:55:21 +000012452 if (filename == NULL) {
12453 ctxt->directory = NULL;
12454 } else {
12455 ctxt->directory = xmlParserGetDirectory(filename);
12456 }
12457
12458 inputStream = xmlNewInputStream(ctxt);
12459 if (inputStream == NULL) {
12460 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012461 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012462 return(NULL);
12463 }
12464
12465 if (filename == NULL)
12466 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012467 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012468 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012469 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012470 if (inputStream->filename == NULL) {
12471 xmlFreeParserCtxt(ctxt);
12472 xmlFreeParserInputBuffer(buf);
12473 return(NULL);
12474 }
12475 }
Owen Taylor3473f882001-02-23 17:55:21 +000012476 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012477 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012478 inputPush(ctxt, inputStream);
12479
William M. Brack3a1cd212005-02-11 14:35:54 +000012480 /*
12481 * If the caller didn't provide an initial 'chunk' for determining
12482 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12483 * that it can be automatically determined later
12484 */
12485 if ((size == 0) || (chunk == NULL)) {
12486 ctxt->charset = XML_CHAR_ENCODING_NONE;
12487 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012488 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12489 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012490
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012491 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012492
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012493 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012494#ifdef DEBUG_PUSH
12495 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12496#endif
12497 }
12498
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012499 if (enc != XML_CHAR_ENCODING_NONE) {
12500 xmlSwitchEncoding(ctxt, enc);
12501 }
12502
Owen Taylor3473f882001-02-23 17:55:21 +000012503 return(ctxt);
12504}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012505#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012506
12507/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012508 * xmlStopParser:
12509 * @ctxt: an XML parser context
12510 *
12511 * Blocks further parser processing
12512 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012513void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012514xmlStopParser(xmlParserCtxtPtr ctxt) {
12515 if (ctxt == NULL)
12516 return;
12517 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarde50ba812013-04-11 15:54:51 +080012518 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012519 ctxt->disableSAX = 1;
12520 if (ctxt->input != NULL) {
12521 ctxt->input->cur = BAD_CAST"";
12522 ctxt->input->base = ctxt->input->cur;
12523 }
12524}
12525
12526/**
Owen Taylor3473f882001-02-23 17:55:21 +000012527 * xmlCreateIOParserCtxt:
12528 * @sax: a SAX handler
12529 * @user_data: The user data returned on SAX callbacks
12530 * @ioread: an I/O read function
12531 * @ioclose: an I/O close function
12532 * @ioctx: an I/O handler
12533 * @enc: the charset encoding if known
12534 *
12535 * Create a parser context for using the XML parser with an existing
12536 * I/O stream
12537 *
12538 * Returns the new parser context or NULL
12539 */
12540xmlParserCtxtPtr
12541xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12542 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12543 void *ioctx, xmlCharEncoding enc) {
12544 xmlParserCtxtPtr ctxt;
12545 xmlParserInputPtr inputStream;
12546 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012547
Daniel Veillard42595322004-11-08 10:52:06 +000012548 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012549
12550 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012551 if (buf == NULL) {
12552 if (ioclose != NULL)
12553 ioclose(ioctx);
12554 return (NULL);
12555 }
Owen Taylor3473f882001-02-23 17:55:21 +000012556
12557 ctxt = xmlNewParserCtxt();
12558 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012559 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012560 return(NULL);
12561 }
12562 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012563#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012564 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012565#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012566 xmlFree(ctxt->sax);
12567 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12568 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012569 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012570 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012571 return(NULL);
12572 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012573 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12574 if (sax->initialized == XML_SAX2_MAGIC)
12575 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12576 else
12577 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012578 if (user_data != NULL)
12579 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012580 }
Owen Taylor3473f882001-02-23 17:55:21 +000012581
12582 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12583 if (inputStream == NULL) {
12584 xmlFreeParserCtxt(ctxt);
12585 return(NULL);
12586 }
12587 inputPush(ctxt, inputStream);
12588
12589 return(ctxt);
12590}
12591
Daniel Veillard4432df22003-09-28 18:58:27 +000012592#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012593/************************************************************************
12594 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012595 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012596 * *
12597 ************************************************************************/
12598
12599/**
12600 * xmlIOParseDTD:
12601 * @sax: the SAX handler block or NULL
12602 * @input: an Input Buffer
12603 * @enc: the charset encoding if known
12604 *
12605 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012606 *
Owen Taylor3473f882001-02-23 17:55:21 +000012607 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012608 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012609 */
12610
12611xmlDtdPtr
12612xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12613 xmlCharEncoding enc) {
12614 xmlDtdPtr ret = NULL;
12615 xmlParserCtxtPtr ctxt;
12616 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012617 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012618
12619 if (input == NULL)
12620 return(NULL);
12621
12622 ctxt = xmlNewParserCtxt();
12623 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012624 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012625 return(NULL);
12626 }
12627
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012628 /* We are loading a DTD */
12629 ctxt->options |= XML_PARSE_DTDLOAD;
12630
Owen Taylor3473f882001-02-23 17:55:21 +000012631 /*
12632 * Set-up the SAX context
12633 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012634 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012635 if (ctxt->sax != NULL)
12636 xmlFree(ctxt->sax);
12637 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012638 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012639 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012640 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012641
12642 /*
12643 * generate a parser input from the I/O handler
12644 */
12645
Daniel Veillard43caefb2003-12-07 19:32:22 +000012646 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012647 if (pinput == NULL) {
12648 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012649 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012650 xmlFreeParserCtxt(ctxt);
12651 return(NULL);
12652 }
12653
12654 /*
12655 * plug some encoding conversion routines here.
12656 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012657 if (xmlPushInput(ctxt, pinput) < 0) {
12658 if (sax != NULL) ctxt->sax = NULL;
12659 xmlFreeParserCtxt(ctxt);
12660 return(NULL);
12661 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012662 if (enc != XML_CHAR_ENCODING_NONE) {
12663 xmlSwitchEncoding(ctxt, enc);
12664 }
Owen Taylor3473f882001-02-23 17:55:21 +000012665
12666 pinput->filename = NULL;
12667 pinput->line = 1;
12668 pinput->col = 1;
12669 pinput->base = ctxt->input->cur;
12670 pinput->cur = ctxt->input->cur;
12671 pinput->free = NULL;
12672
12673 /*
12674 * let's parse that entity knowing it's an external subset.
12675 */
12676 ctxt->inSubset = 2;
12677 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012678 if (ctxt->myDoc == NULL) {
12679 xmlErrMemory(ctxt, "New Doc failed");
12680 return(NULL);
12681 }
12682 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012683 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12684 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012685
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012686 if ((enc == XML_CHAR_ENCODING_NONE) &&
12687 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012688 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012689 * Get the 4 first bytes and decode the charset
12690 * if enc != XML_CHAR_ENCODING_NONE
12691 * plug some encoding conversion routines.
12692 */
12693 start[0] = RAW;
12694 start[1] = NXT(1);
12695 start[2] = NXT(2);
12696 start[3] = NXT(3);
12697 enc = xmlDetectCharEncoding(start, 4);
12698 if (enc != XML_CHAR_ENCODING_NONE) {
12699 xmlSwitchEncoding(ctxt, enc);
12700 }
12701 }
12702
Owen Taylor3473f882001-02-23 17:55:21 +000012703 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12704
12705 if (ctxt->myDoc != NULL) {
12706 if (ctxt->wellFormed) {
12707 ret = ctxt->myDoc->extSubset;
12708 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012709 if (ret != NULL) {
12710 xmlNodePtr tmp;
12711
12712 ret->doc = NULL;
12713 tmp = ret->children;
12714 while (tmp != NULL) {
12715 tmp->doc = NULL;
12716 tmp = tmp->next;
12717 }
12718 }
Owen Taylor3473f882001-02-23 17:55:21 +000012719 } else {
12720 ret = NULL;
12721 }
12722 xmlFreeDoc(ctxt->myDoc);
12723 ctxt->myDoc = NULL;
12724 }
12725 if (sax != NULL) ctxt->sax = NULL;
12726 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012727
Owen Taylor3473f882001-02-23 17:55:21 +000012728 return(ret);
12729}
12730
12731/**
12732 * xmlSAXParseDTD:
12733 * @sax: the SAX handler block
12734 * @ExternalID: a NAME* containing the External ID of the DTD
12735 * @SystemID: a NAME* containing the URL to the DTD
12736 *
12737 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012738 *
Owen Taylor3473f882001-02-23 17:55:21 +000012739 * Returns the resulting xmlDtdPtr or NULL in case of error.
12740 */
12741
12742xmlDtdPtr
12743xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12744 const xmlChar *SystemID) {
12745 xmlDtdPtr ret = NULL;
12746 xmlParserCtxtPtr ctxt;
12747 xmlParserInputPtr input = NULL;
12748 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012749 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012750
12751 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12752
12753 ctxt = xmlNewParserCtxt();
12754 if (ctxt == NULL) {
12755 return(NULL);
12756 }
12757
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012758 /* We are loading a DTD */
12759 ctxt->options |= XML_PARSE_DTDLOAD;
12760
Owen Taylor3473f882001-02-23 17:55:21 +000012761 /*
12762 * Set-up the SAX context
12763 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012764 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012765 if (ctxt->sax != NULL)
12766 xmlFree(ctxt->sax);
12767 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012768 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012769 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012770
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012771 /*
12772 * Canonicalise the system ID
12773 */
12774 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012775 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012776 xmlFreeParserCtxt(ctxt);
12777 return(NULL);
12778 }
Owen Taylor3473f882001-02-23 17:55:21 +000012779
12780 /*
12781 * Ask the Entity resolver to load the damn thing
12782 */
12783
12784 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012785 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12786 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012787 if (input == NULL) {
12788 if (sax != NULL) ctxt->sax = NULL;
12789 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012790 if (systemIdCanonic != NULL)
12791 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012792 return(NULL);
12793 }
12794
12795 /*
12796 * plug some encoding conversion routines here.
12797 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012798 if (xmlPushInput(ctxt, input) < 0) {
12799 if (sax != NULL) ctxt->sax = NULL;
12800 xmlFreeParserCtxt(ctxt);
12801 if (systemIdCanonic != NULL)
12802 xmlFree(systemIdCanonic);
12803 return(NULL);
12804 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012805 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12806 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12807 xmlSwitchEncoding(ctxt, enc);
12808 }
Owen Taylor3473f882001-02-23 17:55:21 +000012809
12810 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012811 input->filename = (char *) systemIdCanonic;
12812 else
12813 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012814 input->line = 1;
12815 input->col = 1;
12816 input->base = ctxt->input->cur;
12817 input->cur = ctxt->input->cur;
12818 input->free = NULL;
12819
12820 /*
12821 * let's parse that entity knowing it's an external subset.
12822 */
12823 ctxt->inSubset = 2;
12824 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012825 if (ctxt->myDoc == NULL) {
12826 xmlErrMemory(ctxt, "New Doc failed");
12827 if (sax != NULL) ctxt->sax = NULL;
12828 xmlFreeParserCtxt(ctxt);
12829 return(NULL);
12830 }
12831 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012832 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12833 ExternalID, SystemID);
12834 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12835
12836 if (ctxt->myDoc != NULL) {
12837 if (ctxt->wellFormed) {
12838 ret = ctxt->myDoc->extSubset;
12839 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012840 if (ret != NULL) {
12841 xmlNodePtr tmp;
12842
12843 ret->doc = NULL;
12844 tmp = ret->children;
12845 while (tmp != NULL) {
12846 tmp->doc = NULL;
12847 tmp = tmp->next;
12848 }
12849 }
Owen Taylor3473f882001-02-23 17:55:21 +000012850 } else {
12851 ret = NULL;
12852 }
12853 xmlFreeDoc(ctxt->myDoc);
12854 ctxt->myDoc = NULL;
12855 }
12856 if (sax != NULL) ctxt->sax = NULL;
12857 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012858
Owen Taylor3473f882001-02-23 17:55:21 +000012859 return(ret);
12860}
12861
Daniel Veillard4432df22003-09-28 18:58:27 +000012862
Owen Taylor3473f882001-02-23 17:55:21 +000012863/**
12864 * xmlParseDTD:
12865 * @ExternalID: a NAME* containing the External ID of the DTD
12866 * @SystemID: a NAME* containing the URL to the DTD
12867 *
12868 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012869 *
Owen Taylor3473f882001-02-23 17:55:21 +000012870 * Returns the resulting xmlDtdPtr or NULL in case of error.
12871 */
12872
12873xmlDtdPtr
12874xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12875 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12876}
Daniel Veillard4432df22003-09-28 18:58:27 +000012877#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012878
12879/************************************************************************
12880 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012881 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012882 * *
12883 ************************************************************************/
12884
12885/**
Owen Taylor3473f882001-02-23 17:55:21 +000012886 * xmlParseCtxtExternalEntity:
12887 * @ctx: the existing parsing context
12888 * @URL: the URL for the entity to load
12889 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012890 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012891 *
12892 * Parse an external general entity within an existing parsing context
12893 * An external general parsed entity is well-formed if it matches the
12894 * production labeled extParsedEnt.
12895 *
12896 * [78] extParsedEnt ::= TextDecl? content
12897 *
12898 * Returns 0 if the entity is well formed, -1 in case of args problem and
12899 * the parser error code otherwise
12900 */
12901
12902int
12903xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012904 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012905 xmlParserCtxtPtr ctxt;
12906 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012907 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012908 xmlSAXHandlerPtr oldsax = NULL;
12909 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012910 xmlChar start[4];
12911 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012912
Daniel Veillardce682bc2004-11-05 17:22:25 +000012913 if (ctx == NULL) return(-1);
12914
Daniel Veillard0161e632008-08-28 15:36:32 +000012915 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12916 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012917 return(XML_ERR_ENTITY_LOOP);
12918 }
12919
Daniel Veillardcda96922001-08-21 10:56:31 +000012920 if (lst != NULL)
12921 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012922 if ((URL == NULL) && (ID == NULL))
12923 return(-1);
12924 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12925 return(-1);
12926
Rob Richards798743a2009-06-19 13:54:25 -040012927 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012928 if (ctxt == NULL) {
12929 return(-1);
12930 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012931
Owen Taylor3473f882001-02-23 17:55:21 +000012932 oldsax = ctxt->sax;
12933 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012934 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012935 newDoc = xmlNewDoc(BAD_CAST "1.0");
12936 if (newDoc == NULL) {
12937 xmlFreeParserCtxt(ctxt);
12938 return(-1);
12939 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012940 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012941 if (ctx->myDoc->dict) {
12942 newDoc->dict = ctx->myDoc->dict;
12943 xmlDictReference(newDoc->dict);
12944 }
Owen Taylor3473f882001-02-23 17:55:21 +000012945 if (ctx->myDoc != NULL) {
12946 newDoc->intSubset = ctx->myDoc->intSubset;
12947 newDoc->extSubset = ctx->myDoc->extSubset;
12948 }
12949 if (ctx->myDoc->URL != NULL) {
12950 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12951 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012952 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12953 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012954 ctxt->sax = oldsax;
12955 xmlFreeParserCtxt(ctxt);
12956 newDoc->intSubset = NULL;
12957 newDoc->extSubset = NULL;
12958 xmlFreeDoc(newDoc);
12959 return(-1);
12960 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012961 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012962 nodePush(ctxt, newDoc->children);
12963 if (ctx->myDoc == NULL) {
12964 ctxt->myDoc = newDoc;
12965 } else {
12966 ctxt->myDoc = ctx->myDoc;
12967 newDoc->children->doc = ctx->myDoc;
12968 }
12969
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012970 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012971 * Get the 4 first bytes and decode the charset
12972 * if enc != XML_CHAR_ENCODING_NONE
12973 * plug some encoding conversion routines.
12974 */
12975 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012976 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12977 start[0] = RAW;
12978 start[1] = NXT(1);
12979 start[2] = NXT(2);
12980 start[3] = NXT(3);
12981 enc = xmlDetectCharEncoding(start, 4);
12982 if (enc != XML_CHAR_ENCODING_NONE) {
12983 xmlSwitchEncoding(ctxt, enc);
12984 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012985 }
12986
Owen Taylor3473f882001-02-23 17:55:21 +000012987 /*
12988 * Parse a possible text declaration first
12989 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012990 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012991 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012992 /*
12993 * An XML-1.0 document can't reference an entity not XML-1.0
12994 */
12995 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12996 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012997 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012998 "Version mismatch between document and entity\n");
12999 }
Owen Taylor3473f882001-02-23 17:55:21 +000013000 }
13001
13002 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080013003 * If the user provided its own SAX callbacks then reuse the
13004 * useData callback field, otherwise the expected setup in a
13005 * DOM builder is to have userData == ctxt
13006 */
13007 if (ctx->userData == ctx)
13008 ctxt->userData = ctxt;
13009 else
13010 ctxt->userData = ctx->userData;
13011
13012 /*
Owen Taylor3473f882001-02-23 17:55:21 +000013013 * Doing validity checking on chunk doesn't make sense
13014 */
13015 ctxt->instate = XML_PARSER_CONTENT;
13016 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013017 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013018 ctxt->loadsubset = ctx->loadsubset;
13019 ctxt->depth = ctx->depth + 1;
13020 ctxt->replaceEntities = ctx->replaceEntities;
13021 if (ctxt->validate) {
13022 ctxt->vctxt.error = ctx->vctxt.error;
13023 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000013024 } else {
13025 ctxt->vctxt.error = NULL;
13026 ctxt->vctxt.warning = NULL;
13027 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000013028 ctxt->vctxt.nodeTab = NULL;
13029 ctxt->vctxt.nodeNr = 0;
13030 ctxt->vctxt.nodeMax = 0;
13031 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013032 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13033 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013034 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13035 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13036 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013037 ctxt->dictNames = ctx->dictNames;
13038 ctxt->attsDefault = ctx->attsDefault;
13039 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013040 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013041
13042 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013043
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013044 ctx->validate = ctxt->validate;
13045 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013046 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013047 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013048 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013049 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013050 }
13051 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013052 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013053 }
13054
13055 if (!ctxt->wellFormed) {
13056 if (ctxt->errNo == 0)
13057 ret = 1;
13058 else
13059 ret = ctxt->errNo;
13060 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013061 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013062 xmlNodePtr cur;
13063
13064 /*
13065 * Return the newly created nodeset after unlinking it from
13066 * they pseudo parent.
13067 */
13068 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013069 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013070 while (cur != NULL) {
13071 cur->parent = NULL;
13072 cur = cur->next;
13073 }
13074 newDoc->children->children = NULL;
13075 }
13076 ret = 0;
13077 }
13078 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013079 ctxt->dict = NULL;
13080 ctxt->attsDefault = NULL;
13081 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013082 xmlFreeParserCtxt(ctxt);
13083 newDoc->intSubset = NULL;
13084 newDoc->extSubset = NULL;
13085 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013086
Owen Taylor3473f882001-02-23 17:55:21 +000013087 return(ret);
13088}
13089
13090/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013091 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013092 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013093 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013094 * @sax: the SAX handler bloc (possibly NULL)
13095 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13096 * @depth: Used for loop detection, use 0
13097 * @URL: the URL for the entity to load
13098 * @ID: the System ID for the entity to load
13099 * @list: the return value for the set of parsed nodes
13100 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013101 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013102 *
13103 * Returns 0 if the entity is well formed, -1 in case of args problem and
13104 * the parser error code otherwise
13105 */
13106
Daniel Veillard7d515752003-09-26 19:12:37 +000013107static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013108xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13109 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013110 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013111 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013112 xmlParserCtxtPtr ctxt;
13113 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013114 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013115 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013116 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013117 xmlChar start[4];
13118 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013119
Daniel Veillard0161e632008-08-28 15:36:32 +000013120 if (((depth > 40) &&
13121 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13122 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013123 return(XML_ERR_ENTITY_LOOP);
13124 }
13125
Owen Taylor3473f882001-02-23 17:55:21 +000013126 if (list != NULL)
13127 *list = NULL;
13128 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013129 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013130 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013131 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013132
13133
Rob Richards9c0aa472009-03-26 18:10:19 +000013134 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013135 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013136 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013137 if (oldctxt != NULL) {
13138 ctxt->_private = oldctxt->_private;
13139 ctxt->loadsubset = oldctxt->loadsubset;
13140 ctxt->validate = oldctxt->validate;
13141 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013142 ctxt->record_info = oldctxt->record_info;
13143 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13144 ctxt->node_seq.length = oldctxt->node_seq.length;
13145 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013146 } else {
13147 /*
13148 * Doing validity checking on chunk without context
13149 * doesn't make sense
13150 */
13151 ctxt->_private = NULL;
13152 ctxt->validate = 0;
13153 ctxt->external = 2;
13154 ctxt->loadsubset = 0;
13155 }
Owen Taylor3473f882001-02-23 17:55:21 +000013156 if (sax != NULL) {
13157 oldsax = ctxt->sax;
13158 ctxt->sax = sax;
13159 if (user_data != NULL)
13160 ctxt->userData = user_data;
13161 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013162 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013163 newDoc = xmlNewDoc(BAD_CAST "1.0");
13164 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013165 ctxt->node_seq.maximum = 0;
13166 ctxt->node_seq.length = 0;
13167 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013168 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013169 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013170 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013171 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013172 newDoc->intSubset = doc->intSubset;
13173 newDoc->extSubset = doc->extSubset;
13174 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013175 xmlDictReference(newDoc->dict);
13176
Owen Taylor3473f882001-02-23 17:55:21 +000013177 if (doc->URL != NULL) {
13178 newDoc->URL = xmlStrdup(doc->URL);
13179 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013180 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13181 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013182 if (sax != NULL)
13183 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013184 ctxt->node_seq.maximum = 0;
13185 ctxt->node_seq.length = 0;
13186 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013187 xmlFreeParserCtxt(ctxt);
13188 newDoc->intSubset = NULL;
13189 newDoc->extSubset = NULL;
13190 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013191 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013192 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013193 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013194 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013195 ctxt->myDoc = doc;
13196 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013197
Daniel Veillard0161e632008-08-28 15:36:32 +000013198 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013199 * Get the 4 first bytes and decode the charset
13200 * if enc != XML_CHAR_ENCODING_NONE
13201 * plug some encoding conversion routines.
13202 */
13203 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013204 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13205 start[0] = RAW;
13206 start[1] = NXT(1);
13207 start[2] = NXT(2);
13208 start[3] = NXT(3);
13209 enc = xmlDetectCharEncoding(start, 4);
13210 if (enc != XML_CHAR_ENCODING_NONE) {
13211 xmlSwitchEncoding(ctxt, enc);
13212 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013213 }
13214
Owen Taylor3473f882001-02-23 17:55:21 +000013215 /*
13216 * Parse a possible text declaration first
13217 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013218 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013219 xmlParseTextDecl(ctxt);
13220 }
13221
Owen Taylor3473f882001-02-23 17:55:21 +000013222 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013223 ctxt->depth = depth;
13224
13225 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013226
Daniel Veillard561b7f82002-03-20 21:55:57 +000013227 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013228 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013229 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013230 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013231 }
13232 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013233 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013234 }
13235
13236 if (!ctxt->wellFormed) {
13237 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013238 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013239 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013240 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013241 } else {
13242 if (list != NULL) {
13243 xmlNodePtr cur;
13244
13245 /*
13246 * Return the newly created nodeset after unlinking it from
13247 * they pseudo parent.
13248 */
13249 cur = newDoc->children->children;
13250 *list = cur;
13251 while (cur != NULL) {
13252 cur->parent = NULL;
13253 cur = cur->next;
13254 }
13255 newDoc->children->children = NULL;
13256 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013257 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013258 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013259
13260 /*
13261 * Record in the parent context the number of entities replacement
13262 * done when parsing that reference.
13263 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013264 if (oldctxt != NULL)
13265 oldctxt->nbentities += ctxt->nbentities;
13266
Daniel Veillard0161e632008-08-28 15:36:32 +000013267 /*
13268 * Also record the size of the entity parsed
13269 */
13270 if (ctxt->input != NULL) {
13271 oldctxt->sizeentities += ctxt->input->consumed;
13272 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13273 }
13274 /*
13275 * And record the last error if any
13276 */
13277 if (ctxt->lastError.code != XML_ERR_OK)
13278 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13279
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013280 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013281 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013282 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13283 oldctxt->node_seq.length = ctxt->node_seq.length;
13284 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013285 ctxt->node_seq.maximum = 0;
13286 ctxt->node_seq.length = 0;
13287 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013288 xmlFreeParserCtxt(ctxt);
13289 newDoc->intSubset = NULL;
13290 newDoc->extSubset = NULL;
13291 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013292
Owen Taylor3473f882001-02-23 17:55:21 +000013293 return(ret);
13294}
13295
Daniel Veillard81273902003-09-30 00:43:48 +000013296#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013297/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013298 * xmlParseExternalEntity:
13299 * @doc: the document the chunk pertains to
13300 * @sax: the SAX handler bloc (possibly NULL)
13301 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13302 * @depth: Used for loop detection, use 0
13303 * @URL: the URL for the entity to load
13304 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013305 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013306 *
13307 * Parse an external general entity
13308 * An external general parsed entity is well-formed if it matches the
13309 * production labeled extParsedEnt.
13310 *
13311 * [78] extParsedEnt ::= TextDecl? content
13312 *
13313 * Returns 0 if the entity is well formed, -1 in case of args problem and
13314 * the parser error code otherwise
13315 */
13316
13317int
13318xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013319 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013320 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013321 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013322}
13323
13324/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013325 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013326 * @doc: the document the chunk pertains to
13327 * @sax: the SAX handler bloc (possibly NULL)
13328 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13329 * @depth: Used for loop detection, use 0
13330 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013331 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013332 *
13333 * Parse a well-balanced chunk of an XML document
13334 * called by the parser
13335 * The allowed sequence for the Well Balanced Chunk is the one defined by
13336 * the content production in the XML grammar:
13337 *
13338 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13339 *
13340 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13341 * the parser error code otherwise
13342 */
13343
13344int
13345xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013346 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013347 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13348 depth, string, lst, 0 );
13349}
Daniel Veillard81273902003-09-30 00:43:48 +000013350#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013351
13352/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013353 * xmlParseBalancedChunkMemoryInternal:
13354 * @oldctxt: the existing parsing context
13355 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13356 * @user_data: the user data field for the parser context
13357 * @lst: the return value for the set of parsed nodes
13358 *
13359 *
13360 * Parse a well-balanced chunk of an XML document
13361 * called by the parser
13362 * The allowed sequence for the Well Balanced Chunk is the one defined by
13363 * the content production in the XML grammar:
13364 *
13365 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13366 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013367 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13368 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013369 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013370 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013371 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013372 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013373static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013374xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13375 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13376 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013377 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013378 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013379 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013380 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013381 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013382 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013383 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013384#ifdef SAX2
13385 int i;
13386#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013387
Daniel Veillard0161e632008-08-28 15:36:32 +000013388 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13389 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013390 return(XML_ERR_ENTITY_LOOP);
13391 }
13392
13393
13394 if (lst != NULL)
13395 *lst = NULL;
13396 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013397 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013398
13399 size = xmlStrlen(string);
13400
13401 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013402 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013403 if (user_data != NULL)
13404 ctxt->userData = user_data;
13405 else
13406 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013407 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13408 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013409 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13410 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13411 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013412
Daniel Veillard74eaec12009-08-26 15:57:20 +020013413#ifdef SAX2
13414 /* propagate namespaces down the entity */
13415 for (i = 0;i < oldctxt->nsNr;i += 2) {
13416 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13417 }
13418#endif
13419
Daniel Veillard328f48c2002-11-15 15:24:34 +000013420 oldsax = ctxt->sax;
13421 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013422 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013423 ctxt->replaceEntities = oldctxt->replaceEntities;
13424 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013425
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013426 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013427 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013428 newDoc = xmlNewDoc(BAD_CAST "1.0");
13429 if (newDoc == NULL) {
13430 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013431 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013432 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013433 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013434 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013435 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013436 newDoc->dict = ctxt->dict;
13437 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013438 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013439 } else {
13440 ctxt->myDoc = oldctxt->myDoc;
13441 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013442 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013443 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013444 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13445 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013446 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013447 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013448 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013449 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013450 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013451 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013452 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013453 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013454 ctxt->myDoc->children = NULL;
13455 ctxt->myDoc->last = NULL;
13456 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013457 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013458 ctxt->instate = XML_PARSER_CONTENT;
13459 ctxt->depth = oldctxt->depth + 1;
13460
Daniel Veillard328f48c2002-11-15 15:24:34 +000013461 ctxt->validate = 0;
13462 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013463 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13464 /*
13465 * ID/IDREF registration will be done in xmlValidateElement below
13466 */
13467 ctxt->loadsubset |= XML_SKIP_IDS;
13468 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013469 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013470 ctxt->attsDefault = oldctxt->attsDefault;
13471 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013472
Daniel Veillard68e9e742002-11-16 15:35:11 +000013473 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013474 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013475 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013476 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013477 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013478 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013479 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013480 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013481 }
13482
13483 if (!ctxt->wellFormed) {
13484 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013485 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013486 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013487 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013488 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013489 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013490 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013491
William M. Brack7b9154b2003-09-27 19:23:50 +000013492 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013493 xmlNodePtr cur;
13494
13495 /*
13496 * Return the newly created nodeset after unlinking it from
13497 * they pseudo parent.
13498 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013499 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013500 *lst = cur;
13501 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013502#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013503 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13504 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13505 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013506 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13507 oldctxt->myDoc, cur);
13508 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013509#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013510 cur->parent = NULL;
13511 cur = cur->next;
13512 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013513 ctxt->myDoc->children->children = NULL;
13514 }
13515 if (ctxt->myDoc != NULL) {
13516 xmlFreeNode(ctxt->myDoc->children);
13517 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013518 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013519 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013520
13521 /*
13522 * Record in the parent context the number of entities replacement
13523 * done when parsing that reference.
13524 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013525 if (oldctxt != NULL)
13526 oldctxt->nbentities += ctxt->nbentities;
13527
Daniel Veillard0161e632008-08-28 15:36:32 +000013528 /*
13529 * Also record the last error if any
13530 */
13531 if (ctxt->lastError.code != XML_ERR_OK)
13532 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13533
Daniel Veillard328f48c2002-11-15 15:24:34 +000013534 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013535 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013536 ctxt->attsDefault = NULL;
13537 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013538 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013539 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013540 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013541 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013542
Daniel Veillard328f48c2002-11-15 15:24:34 +000013543 return(ret);
13544}
13545
Daniel Veillard29b17482004-08-16 00:39:03 +000013546/**
13547 * xmlParseInNodeContext:
13548 * @node: the context node
13549 * @data: the input string
13550 * @datalen: the input string length in bytes
13551 * @options: a combination of xmlParserOption
13552 * @lst: the return value for the set of parsed nodes
13553 *
13554 * Parse a well-balanced chunk of an XML document
13555 * within the context (DTD, namespaces, etc ...) of the given node.
13556 *
13557 * The allowed sequence for the data is a Well Balanced Chunk defined by
13558 * the content production in the XML grammar:
13559 *
13560 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13561 *
13562 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13563 * error code otherwise
13564 */
13565xmlParserErrors
13566xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13567 int options, xmlNodePtr *lst) {
13568#ifdef SAX2
13569 xmlParserCtxtPtr ctxt;
13570 xmlDocPtr doc = NULL;
13571 xmlNodePtr fake, cur;
13572 int nsnr = 0;
13573
13574 xmlParserErrors ret = XML_ERR_OK;
13575
13576 /*
13577 * check all input parameters, grab the document
13578 */
13579 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13580 return(XML_ERR_INTERNAL_ERROR);
13581 switch (node->type) {
13582 case XML_ELEMENT_NODE:
13583 case XML_ATTRIBUTE_NODE:
13584 case XML_TEXT_NODE:
13585 case XML_CDATA_SECTION_NODE:
13586 case XML_ENTITY_REF_NODE:
13587 case XML_PI_NODE:
13588 case XML_COMMENT_NODE:
13589 case XML_DOCUMENT_NODE:
13590 case XML_HTML_DOCUMENT_NODE:
13591 break;
13592 default:
13593 return(XML_ERR_INTERNAL_ERROR);
13594
13595 }
13596 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13597 (node->type != XML_DOCUMENT_NODE) &&
13598 (node->type != XML_HTML_DOCUMENT_NODE))
13599 node = node->parent;
13600 if (node == NULL)
13601 return(XML_ERR_INTERNAL_ERROR);
13602 if (node->type == XML_ELEMENT_NODE)
13603 doc = node->doc;
13604 else
13605 doc = (xmlDocPtr) node;
13606 if (doc == NULL)
13607 return(XML_ERR_INTERNAL_ERROR);
13608
13609 /*
13610 * allocate a context and set-up everything not related to the
13611 * node position in the tree
13612 */
13613 if (doc->type == XML_DOCUMENT_NODE)
13614 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13615#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013616 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013617 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013618 /*
13619 * When parsing in context, it makes no sense to add implied
13620 * elements like html/body/etc...
13621 */
13622 options |= HTML_PARSE_NOIMPLIED;
13623 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013624#endif
13625 else
13626 return(XML_ERR_INTERNAL_ERROR);
13627
13628 if (ctxt == NULL)
13629 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013630
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013631 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013632 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13633 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13634 * we must wait until the last moment to free the original one.
13635 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013636 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013637 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013638 xmlDictFree(ctxt->dict);
13639 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013640 } else
13641 options |= XML_PARSE_NODICT;
13642
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013643 if (doc->encoding != NULL) {
13644 xmlCharEncodingHandlerPtr hdlr;
13645
13646 if (ctxt->encoding != NULL)
13647 xmlFree((xmlChar *) ctxt->encoding);
13648 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13649
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013650 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013651 if (hdlr != NULL) {
13652 xmlSwitchToEncoding(ctxt, hdlr);
13653 } else {
13654 return(XML_ERR_UNSUPPORTED_ENCODING);
13655 }
13656 }
13657
Daniel Veillard37334572008-07-31 08:20:02 +000013658 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013659 xmlDetectSAX2(ctxt);
13660 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013661 /* parsing in context, i.e. as within existing content */
13662 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013663
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013664 fake = xmlNewComment(NULL);
13665 if (fake == NULL) {
13666 xmlFreeParserCtxt(ctxt);
13667 return(XML_ERR_NO_MEMORY);
13668 }
13669 xmlAddChild(node, fake);
13670
Daniel Veillard29b17482004-08-16 00:39:03 +000013671 if (node->type == XML_ELEMENT_NODE) {
13672 nodePush(ctxt, node);
13673 /*
13674 * initialize the SAX2 namespaces stack
13675 */
13676 cur = node;
13677 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13678 xmlNsPtr ns = cur->nsDef;
13679 const xmlChar *iprefix, *ihref;
13680
13681 while (ns != NULL) {
13682 if (ctxt->dict) {
13683 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13684 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13685 } else {
13686 iprefix = ns->prefix;
13687 ihref = ns->href;
13688 }
13689
13690 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13691 nsPush(ctxt, iprefix, ihref);
13692 nsnr++;
13693 }
13694 ns = ns->next;
13695 }
13696 cur = cur->parent;
13697 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013698 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013699
13700 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13701 /*
13702 * ID/IDREF registration will be done in xmlValidateElement below
13703 */
13704 ctxt->loadsubset |= XML_SKIP_IDS;
13705 }
13706
Daniel Veillard499cc922006-01-18 17:22:35 +000013707#ifdef LIBXML_HTML_ENABLED
13708 if (doc->type == XML_HTML_DOCUMENT_NODE)
13709 __htmlParseContent(ctxt);
13710 else
13711#endif
13712 xmlParseContent(ctxt);
13713
Daniel Veillard29b17482004-08-16 00:39:03 +000013714 nsPop(ctxt, nsnr);
13715 if ((RAW == '<') && (NXT(1) == '/')) {
13716 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13717 } else if (RAW != 0) {
13718 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13719 }
13720 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13721 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13722 ctxt->wellFormed = 0;
13723 }
13724
13725 if (!ctxt->wellFormed) {
13726 if (ctxt->errNo == 0)
13727 ret = XML_ERR_INTERNAL_ERROR;
13728 else
13729 ret = (xmlParserErrors)ctxt->errNo;
13730 } else {
13731 ret = XML_ERR_OK;
13732 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013733
Daniel Veillard29b17482004-08-16 00:39:03 +000013734 /*
13735 * Return the newly created nodeset after unlinking it from
13736 * the pseudo sibling.
13737 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013738
Daniel Veillard29b17482004-08-16 00:39:03 +000013739 cur = fake->next;
13740 fake->next = NULL;
13741 node->last = fake;
13742
13743 if (cur != NULL) {
13744 cur->prev = NULL;
13745 }
13746
13747 *lst = cur;
13748
13749 while (cur != NULL) {
13750 cur->parent = NULL;
13751 cur = cur->next;
13752 }
13753
13754 xmlUnlinkNode(fake);
13755 xmlFreeNode(fake);
13756
13757
13758 if (ret != XML_ERR_OK) {
13759 xmlFreeNodeList(*lst);
13760 *lst = NULL;
13761 }
William M. Brackc3f81342004-10-03 01:22:44 +000013762
William M. Brackb7b54de2004-10-06 16:38:01 +000013763 if (doc->dict != NULL)
13764 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013765 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013766
Daniel Veillard29b17482004-08-16 00:39:03 +000013767 return(ret);
13768#else /* !SAX2 */
13769 return(XML_ERR_INTERNAL_ERROR);
13770#endif
13771}
13772
Daniel Veillard81273902003-09-30 00:43:48 +000013773#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013774/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013775 * xmlParseBalancedChunkMemoryRecover:
13776 * @doc: the document the chunk pertains to
13777 * @sax: the SAX handler bloc (possibly NULL)
13778 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13779 * @depth: Used for loop detection, use 0
13780 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13781 * @lst: the return value for the set of parsed nodes
13782 * @recover: return nodes even if the data is broken (use 0)
13783 *
13784 *
13785 * Parse a well-balanced chunk of an XML document
13786 * called by the parser
13787 * The allowed sequence for the Well Balanced Chunk is the one defined by
13788 * the content production in the XML grammar:
13789 *
13790 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13791 *
13792 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13793 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013794 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013795 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013796 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13797 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013798 */
13799int
13800xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013801 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013802 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013803 xmlParserCtxtPtr ctxt;
13804 xmlDocPtr newDoc;
13805 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013806 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013807 int size;
13808 int ret = 0;
13809
Daniel Veillard0161e632008-08-28 15:36:32 +000013810 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013811 return(XML_ERR_ENTITY_LOOP);
13812 }
13813
13814
Daniel Veillardcda96922001-08-21 10:56:31 +000013815 if (lst != NULL)
13816 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013817 if (string == NULL)
13818 return(-1);
13819
13820 size = xmlStrlen(string);
13821
13822 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13823 if (ctxt == NULL) return(-1);
13824 ctxt->userData = ctxt;
13825 if (sax != NULL) {
13826 oldsax = ctxt->sax;
13827 ctxt->sax = sax;
13828 if (user_data != NULL)
13829 ctxt->userData = user_data;
13830 }
13831 newDoc = xmlNewDoc(BAD_CAST "1.0");
13832 if (newDoc == NULL) {
13833 xmlFreeParserCtxt(ctxt);
13834 return(-1);
13835 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013836 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013837 if ((doc != NULL) && (doc->dict != NULL)) {
13838 xmlDictFree(ctxt->dict);
13839 ctxt->dict = doc->dict;
13840 xmlDictReference(ctxt->dict);
13841 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13842 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13843 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13844 ctxt->dictNames = 1;
13845 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013846 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013847 }
Owen Taylor3473f882001-02-23 17:55:21 +000013848 if (doc != NULL) {
13849 newDoc->intSubset = doc->intSubset;
13850 newDoc->extSubset = doc->extSubset;
13851 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013852 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13853 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013854 if (sax != NULL)
13855 ctxt->sax = oldsax;
13856 xmlFreeParserCtxt(ctxt);
13857 newDoc->intSubset = NULL;
13858 newDoc->extSubset = NULL;
13859 xmlFreeDoc(newDoc);
13860 return(-1);
13861 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013862 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13863 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013864 if (doc == NULL) {
13865 ctxt->myDoc = newDoc;
13866 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013867 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013868 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013869 /* Ensure that doc has XML spec namespace */
13870 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13871 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013872 }
13873 ctxt->instate = XML_PARSER_CONTENT;
13874 ctxt->depth = depth;
13875
13876 /*
13877 * Doing validity checking on chunk doesn't make sense
13878 */
13879 ctxt->validate = 0;
13880 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013881 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013882
Daniel Veillardb39bc392002-10-26 19:29:51 +000013883 if ( doc != NULL ){
13884 content = doc->children;
13885 doc->children = NULL;
13886 xmlParseContent(ctxt);
13887 doc->children = content;
13888 }
13889 else {
13890 xmlParseContent(ctxt);
13891 }
Owen Taylor3473f882001-02-23 17:55:21 +000013892 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013893 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013894 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013895 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013896 }
13897 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013898 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013899 }
13900
13901 if (!ctxt->wellFormed) {
13902 if (ctxt->errNo == 0)
13903 ret = 1;
13904 else
13905 ret = ctxt->errNo;
13906 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013907 ret = 0;
13908 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013909
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013910 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13911 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013912
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013913 /*
13914 * Return the newly created nodeset after unlinking it from
13915 * they pseudo parent.
13916 */
13917 cur = newDoc->children->children;
13918 *lst = cur;
13919 while (cur != NULL) {
13920 xmlSetTreeDoc(cur, doc);
13921 cur->parent = NULL;
13922 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013923 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013924 newDoc->children->children = NULL;
13925 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013926
13927 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013928 ctxt->sax = oldsax;
13929 xmlFreeParserCtxt(ctxt);
13930 newDoc->intSubset = NULL;
13931 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013932 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013933 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013934
Owen Taylor3473f882001-02-23 17:55:21 +000013935 return(ret);
13936}
13937
13938/**
13939 * xmlSAXParseEntity:
13940 * @sax: the SAX handler block
13941 * @filename: the filename
13942 *
13943 * parse an XML external entity out of context and build a tree.
13944 * It use the given SAX function block to handle the parsing callback.
13945 * If sax is NULL, fallback to the default DOM tree building routines.
13946 *
13947 * [78] extParsedEnt ::= TextDecl? content
13948 *
13949 * This correspond to a "Well Balanced" chunk
13950 *
13951 * Returns the resulting document tree
13952 */
13953
13954xmlDocPtr
13955xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13956 xmlDocPtr ret;
13957 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013958
13959 ctxt = xmlCreateFileParserCtxt(filename);
13960 if (ctxt == NULL) {
13961 return(NULL);
13962 }
13963 if (sax != NULL) {
13964 if (ctxt->sax != NULL)
13965 xmlFree(ctxt->sax);
13966 ctxt->sax = sax;
13967 ctxt->userData = NULL;
13968 }
13969
Owen Taylor3473f882001-02-23 17:55:21 +000013970 xmlParseExtParsedEnt(ctxt);
13971
13972 if (ctxt->wellFormed)
13973 ret = ctxt->myDoc;
13974 else {
13975 ret = NULL;
13976 xmlFreeDoc(ctxt->myDoc);
13977 ctxt->myDoc = NULL;
13978 }
13979 if (sax != NULL)
13980 ctxt->sax = NULL;
13981 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013982
Owen Taylor3473f882001-02-23 17:55:21 +000013983 return(ret);
13984}
13985
13986/**
13987 * xmlParseEntity:
13988 * @filename: the filename
13989 *
13990 * parse an XML external entity out of context and build a tree.
13991 *
13992 * [78] extParsedEnt ::= TextDecl? content
13993 *
13994 * This correspond to a "Well Balanced" chunk
13995 *
13996 * Returns the resulting document tree
13997 */
13998
13999xmlDocPtr
14000xmlParseEntity(const char *filename) {
14001 return(xmlSAXParseEntity(NULL, filename));
14002}
Daniel Veillard81273902003-09-30 00:43:48 +000014003#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014004
14005/**
Rob Richards9c0aa472009-03-26 18:10:19 +000014006 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000014007 * @URL: the entity URL
14008 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000014009 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000014010 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000014011 *
14012 * Create a parser context for an external entity
14013 * Automatic support for ZLIB/Compress compressed document is provided
14014 * by default if found at compile-time.
14015 *
14016 * Returns the new parser context or NULL
14017 */
Rob Richards9c0aa472009-03-26 18:10:19 +000014018static xmlParserCtxtPtr
14019xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14020 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000014021 xmlParserCtxtPtr ctxt;
14022 xmlParserInputPtr inputStream;
14023 char *directory = NULL;
14024 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000014025
Owen Taylor3473f882001-02-23 17:55:21 +000014026 ctxt = xmlNewParserCtxt();
14027 if (ctxt == NULL) {
14028 return(NULL);
14029 }
14030
Daniel Veillard48247b42009-07-10 16:12:46 +020014031 if (pctx != NULL) {
14032 ctxt->options = pctx->options;
14033 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014034 }
14035
Owen Taylor3473f882001-02-23 17:55:21 +000014036 uri = xmlBuildURI(URL, base);
14037
14038 if (uri == NULL) {
14039 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14040 if (inputStream == NULL) {
14041 xmlFreeParserCtxt(ctxt);
14042 return(NULL);
14043 }
14044
14045 inputPush(ctxt, inputStream);
14046
14047 if ((ctxt->directory == NULL) && (directory == NULL))
14048 directory = xmlParserGetDirectory((char *)URL);
14049 if ((ctxt->directory == NULL) && (directory != NULL))
14050 ctxt->directory = directory;
14051 } else {
14052 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14053 if (inputStream == NULL) {
14054 xmlFree(uri);
14055 xmlFreeParserCtxt(ctxt);
14056 return(NULL);
14057 }
14058
14059 inputPush(ctxt, inputStream);
14060
14061 if ((ctxt->directory == NULL) && (directory == NULL))
14062 directory = xmlParserGetDirectory((char *)uri);
14063 if ((ctxt->directory == NULL) && (directory != NULL))
14064 ctxt->directory = directory;
14065 xmlFree(uri);
14066 }
Owen Taylor3473f882001-02-23 17:55:21 +000014067 return(ctxt);
14068}
14069
Rob Richards9c0aa472009-03-26 18:10:19 +000014070/**
14071 * xmlCreateEntityParserCtxt:
14072 * @URL: the entity URL
14073 * @ID: the entity PUBLIC ID
14074 * @base: a possible base for the target URI
14075 *
14076 * Create a parser context for an external entity
14077 * Automatic support for ZLIB/Compress compressed document is provided
14078 * by default if found at compile-time.
14079 *
14080 * Returns the new parser context or NULL
14081 */
14082xmlParserCtxtPtr
14083xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14084 const xmlChar *base) {
14085 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14086
14087}
14088
Owen Taylor3473f882001-02-23 17:55:21 +000014089/************************************************************************
14090 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014091 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014092 * *
14093 ************************************************************************/
14094
14095/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014096 * xmlCreateURLParserCtxt:
14097 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014098 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014099 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014100 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014101 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014102 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014103 *
14104 * Returns the new parser context or NULL
14105 */
14106xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014107xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014108{
14109 xmlParserCtxtPtr ctxt;
14110 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014111 char *directory = NULL;
14112
Owen Taylor3473f882001-02-23 17:55:21 +000014113 ctxt = xmlNewParserCtxt();
14114 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014115 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014116 return(NULL);
14117 }
14118
Daniel Veillarddf292f72005-01-16 19:00:15 +000014119 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014120 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014121 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014122
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014123 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014124 if (inputStream == NULL) {
14125 xmlFreeParserCtxt(ctxt);
14126 return(NULL);
14127 }
14128
Owen Taylor3473f882001-02-23 17:55:21 +000014129 inputPush(ctxt, inputStream);
14130 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014131 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014132 if ((ctxt->directory == NULL) && (directory != NULL))
14133 ctxt->directory = directory;
14134
14135 return(ctxt);
14136}
14137
Daniel Veillard61b93382003-11-03 14:28:31 +000014138/**
14139 * xmlCreateFileParserCtxt:
14140 * @filename: the filename
14141 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014142 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014143 * Automatic support for ZLIB/Compress compressed document is provided
14144 * by default if found at compile-time.
14145 *
14146 * Returns the new parser context or NULL
14147 */
14148xmlParserCtxtPtr
14149xmlCreateFileParserCtxt(const char *filename)
14150{
14151 return(xmlCreateURLParserCtxt(filename, 0));
14152}
14153
Daniel Veillard81273902003-09-30 00:43:48 +000014154#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014155/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014156 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014157 * @sax: the SAX handler block
14158 * @filename: the filename
14159 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14160 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014161 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014162 *
14163 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14164 * compressed document is provided by default if found at compile-time.
14165 * It use the given SAX function block to handle the parsing callback.
14166 * If sax is NULL, fallback to the default DOM tree building routines.
14167 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014168 * User data (void *) is stored within the parser context in the
14169 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014170 *
Owen Taylor3473f882001-02-23 17:55:21 +000014171 * Returns the resulting document tree
14172 */
14173
14174xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014175xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14176 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014177 xmlDocPtr ret;
14178 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014179
Daniel Veillard635ef722001-10-29 11:48:19 +000014180 xmlInitParser();
14181
Owen Taylor3473f882001-02-23 17:55:21 +000014182 ctxt = xmlCreateFileParserCtxt(filename);
14183 if (ctxt == NULL) {
14184 return(NULL);
14185 }
14186 if (sax != NULL) {
14187 if (ctxt->sax != NULL)
14188 xmlFree(ctxt->sax);
14189 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014190 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014191 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014192 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014193 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014194 }
Owen Taylor3473f882001-02-23 17:55:21 +000014195
Daniel Veillard37d2d162008-03-14 10:54:00 +000014196 if (ctxt->directory == NULL)
14197 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014198
Daniel Veillarddad3f682002-11-17 16:47:27 +000014199 ctxt->recovery = recovery;
14200
Owen Taylor3473f882001-02-23 17:55:21 +000014201 xmlParseDocument(ctxt);
14202
William M. Brackc07329e2003-09-08 01:57:30 +000014203 if ((ctxt->wellFormed) || recovery) {
14204 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014205 if (ret != NULL) {
14206 if (ctxt->input->buf->compressed > 0)
14207 ret->compression = 9;
14208 else
14209 ret->compression = ctxt->input->buf->compressed;
14210 }
William M. Brackc07329e2003-09-08 01:57:30 +000014211 }
Owen Taylor3473f882001-02-23 17:55:21 +000014212 else {
14213 ret = NULL;
14214 xmlFreeDoc(ctxt->myDoc);
14215 ctxt->myDoc = NULL;
14216 }
14217 if (sax != NULL)
14218 ctxt->sax = NULL;
14219 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014220
Owen Taylor3473f882001-02-23 17:55:21 +000014221 return(ret);
14222}
14223
14224/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014225 * xmlSAXParseFile:
14226 * @sax: the SAX handler block
14227 * @filename: the filename
14228 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14229 * documents
14230 *
14231 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14232 * compressed document is provided by default if found at compile-time.
14233 * It use the given SAX function block to handle the parsing callback.
14234 * If sax is NULL, fallback to the default DOM tree building routines.
14235 *
14236 * Returns the resulting document tree
14237 */
14238
14239xmlDocPtr
14240xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14241 int recovery) {
14242 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14243}
14244
14245/**
Owen Taylor3473f882001-02-23 17:55:21 +000014246 * xmlRecoverDoc:
14247 * @cur: a pointer to an array of xmlChar
14248 *
14249 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014250 * In the case the document is not Well Formed, a attempt to build a
14251 * tree is tried anyway
14252 *
14253 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014254 */
14255
14256xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014257xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014258 return(xmlSAXParseDoc(NULL, cur, 1));
14259}
14260
14261/**
14262 * xmlParseFile:
14263 * @filename: the filename
14264 *
14265 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14266 * compressed document is provided by default if found at compile-time.
14267 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014268 * Returns the resulting document tree if the file was wellformed,
14269 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014270 */
14271
14272xmlDocPtr
14273xmlParseFile(const char *filename) {
14274 return(xmlSAXParseFile(NULL, filename, 0));
14275}
14276
14277/**
14278 * xmlRecoverFile:
14279 * @filename: the filename
14280 *
14281 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14282 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014283 * In the case the document is not Well Formed, it attempts to build
14284 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014285 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014286 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014287 */
14288
14289xmlDocPtr
14290xmlRecoverFile(const char *filename) {
14291 return(xmlSAXParseFile(NULL, filename, 1));
14292}
14293
14294
14295/**
14296 * xmlSetupParserForBuffer:
14297 * @ctxt: an XML parser context
14298 * @buffer: a xmlChar * buffer
14299 * @filename: a file name
14300 *
14301 * Setup the parser context to parse a new buffer; Clears any prior
14302 * contents from the parser context. The buffer parameter must not be
14303 * NULL, but the filename parameter can be
14304 */
14305void
14306xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14307 const char* filename)
14308{
14309 xmlParserInputPtr input;
14310
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014311 if ((ctxt == NULL) || (buffer == NULL))
14312 return;
14313
Owen Taylor3473f882001-02-23 17:55:21 +000014314 input = xmlNewInputStream(ctxt);
14315 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014316 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014317 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014318 return;
14319 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014320
Owen Taylor3473f882001-02-23 17:55:21 +000014321 xmlClearParserCtxt(ctxt);
14322 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014323 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014324 input->base = buffer;
14325 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014326 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014327 inputPush(ctxt, input);
14328}
14329
14330/**
14331 * xmlSAXUserParseFile:
14332 * @sax: a SAX handler
14333 * @user_data: The user data returned on SAX callbacks
14334 * @filename: a file name
14335 *
14336 * parse an XML file and call the given SAX handler routines.
14337 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014338 *
Owen Taylor3473f882001-02-23 17:55:21 +000014339 * Returns 0 in case of success or a error number otherwise
14340 */
14341int
14342xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14343 const char *filename) {
14344 int ret = 0;
14345 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014346
Owen Taylor3473f882001-02-23 17:55:21 +000014347 ctxt = xmlCreateFileParserCtxt(filename);
14348 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014349 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014350 xmlFree(ctxt->sax);
14351 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014352 xmlDetectSAX2(ctxt);
14353
Owen Taylor3473f882001-02-23 17:55:21 +000014354 if (user_data != NULL)
14355 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014356
Owen Taylor3473f882001-02-23 17:55:21 +000014357 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014358
Owen Taylor3473f882001-02-23 17:55:21 +000014359 if (ctxt->wellFormed)
14360 ret = 0;
14361 else {
14362 if (ctxt->errNo != 0)
14363 ret = ctxt->errNo;
14364 else
14365 ret = -1;
14366 }
14367 if (sax != NULL)
14368 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014369 if (ctxt->myDoc != NULL) {
14370 xmlFreeDoc(ctxt->myDoc);
14371 ctxt->myDoc = NULL;
14372 }
Owen Taylor3473f882001-02-23 17:55:21 +000014373 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014374
Owen Taylor3473f882001-02-23 17:55:21 +000014375 return ret;
14376}
Daniel Veillard81273902003-09-30 00:43:48 +000014377#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014378
14379/************************************************************************
14380 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014381 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014382 * *
14383 ************************************************************************/
14384
14385/**
14386 * xmlCreateMemoryParserCtxt:
14387 * @buffer: a pointer to a char array
14388 * @size: the size of the array
14389 *
14390 * Create a parser context for an XML in-memory document.
14391 *
14392 * Returns the new parser context or NULL
14393 */
14394xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014395xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014396 xmlParserCtxtPtr ctxt;
14397 xmlParserInputPtr input;
14398 xmlParserInputBufferPtr buf;
14399
14400 if (buffer == NULL)
14401 return(NULL);
14402 if (size <= 0)
14403 return(NULL);
14404
14405 ctxt = xmlNewParserCtxt();
14406 if (ctxt == NULL)
14407 return(NULL);
14408
Daniel Veillard53350552003-09-18 13:35:51 +000014409 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014410 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014411 if (buf == NULL) {
14412 xmlFreeParserCtxt(ctxt);
14413 return(NULL);
14414 }
Owen Taylor3473f882001-02-23 17:55:21 +000014415
14416 input = xmlNewInputStream(ctxt);
14417 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014418 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014419 xmlFreeParserCtxt(ctxt);
14420 return(NULL);
14421 }
14422
14423 input->filename = NULL;
14424 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014425 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014426
14427 inputPush(ctxt, input);
14428 return(ctxt);
14429}
14430
Daniel Veillard81273902003-09-30 00:43:48 +000014431#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014432/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014433 * xmlSAXParseMemoryWithData:
14434 * @sax: the SAX handler block
14435 * @buffer: an pointer to a char array
14436 * @size: the size of the array
14437 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14438 * documents
14439 * @data: the userdata
14440 *
14441 * parse an XML in-memory block and use the given SAX function block
14442 * to handle the parsing callback. If sax is NULL, fallback to the default
14443 * DOM tree building routines.
14444 *
14445 * User data (void *) is stored within the parser context in the
14446 * context's _private member, so it is available nearly everywhere in libxml
14447 *
14448 * Returns the resulting document tree
14449 */
14450
14451xmlDocPtr
14452xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14453 int size, int recovery, void *data) {
14454 xmlDocPtr ret;
14455 xmlParserCtxtPtr ctxt;
14456
Daniel Veillardab2a7632009-07-09 08:45:03 +020014457 xmlInitParser();
14458
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014459 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14460 if (ctxt == NULL) return(NULL);
14461 if (sax != NULL) {
14462 if (ctxt->sax != NULL)
14463 xmlFree(ctxt->sax);
14464 ctxt->sax = sax;
14465 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014466 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014467 if (data!=NULL) {
14468 ctxt->_private=data;
14469 }
14470
Daniel Veillardadba5f12003-04-04 16:09:01 +000014471 ctxt->recovery = recovery;
14472
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014473 xmlParseDocument(ctxt);
14474
14475 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14476 else {
14477 ret = NULL;
14478 xmlFreeDoc(ctxt->myDoc);
14479 ctxt->myDoc = NULL;
14480 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014481 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014482 ctxt->sax = NULL;
14483 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014484
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014485 return(ret);
14486}
14487
14488/**
Owen Taylor3473f882001-02-23 17:55:21 +000014489 * xmlSAXParseMemory:
14490 * @sax: the SAX handler block
14491 * @buffer: an pointer to a char array
14492 * @size: the size of the array
14493 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14494 * documents
14495 *
14496 * parse an XML in-memory block and use the given SAX function block
14497 * to handle the parsing callback. If sax is NULL, fallback to the default
14498 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014499 *
Owen Taylor3473f882001-02-23 17:55:21 +000014500 * Returns the resulting document tree
14501 */
14502xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014503xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14504 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014505 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014506}
14507
14508/**
14509 * xmlParseMemory:
14510 * @buffer: an pointer to a char array
14511 * @size: the size of the array
14512 *
14513 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014514 *
Owen Taylor3473f882001-02-23 17:55:21 +000014515 * Returns the resulting document tree
14516 */
14517
Daniel Veillard50822cb2001-07-26 20:05:51 +000014518xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014519 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14520}
14521
14522/**
14523 * xmlRecoverMemory:
14524 * @buffer: an pointer to a char array
14525 * @size: the size of the array
14526 *
14527 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014528 * In the case the document is not Well Formed, an attempt to
14529 * build a tree is tried anyway
14530 *
14531 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014532 */
14533
Daniel Veillard50822cb2001-07-26 20:05:51 +000014534xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014535 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14536}
14537
14538/**
14539 * xmlSAXUserParseMemory:
14540 * @sax: a SAX handler
14541 * @user_data: The user data returned on SAX callbacks
14542 * @buffer: an in-memory XML document input
14543 * @size: the length of the XML document in bytes
14544 *
14545 * A better SAX parsing routine.
14546 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014547 *
Owen Taylor3473f882001-02-23 17:55:21 +000014548 * Returns 0 in case of success or a error number otherwise
14549 */
14550int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014551 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014552 int ret = 0;
14553 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014554
14555 xmlInitParser();
14556
Owen Taylor3473f882001-02-23 17:55:21 +000014557 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14558 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014559 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14560 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014561 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014562 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014563
Daniel Veillard30211a02001-04-26 09:33:18 +000014564 if (user_data != NULL)
14565 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014566
Owen Taylor3473f882001-02-23 17:55:21 +000014567 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014568
Owen Taylor3473f882001-02-23 17:55:21 +000014569 if (ctxt->wellFormed)
14570 ret = 0;
14571 else {
14572 if (ctxt->errNo != 0)
14573 ret = ctxt->errNo;
14574 else
14575 ret = -1;
14576 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014577 if (sax != NULL)
14578 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014579 if (ctxt->myDoc != NULL) {
14580 xmlFreeDoc(ctxt->myDoc);
14581 ctxt->myDoc = NULL;
14582 }
Owen Taylor3473f882001-02-23 17:55:21 +000014583 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014584
Owen Taylor3473f882001-02-23 17:55:21 +000014585 return ret;
14586}
Daniel Veillard81273902003-09-30 00:43:48 +000014587#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014588
14589/**
14590 * xmlCreateDocParserCtxt:
14591 * @cur: a pointer to an array of xmlChar
14592 *
14593 * Creates a parser context for an XML in-memory document.
14594 *
14595 * Returns the new parser context or NULL
14596 */
14597xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014598xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014599 int len;
14600
14601 if (cur == NULL)
14602 return(NULL);
14603 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014604 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014605}
14606
Daniel Veillard81273902003-09-30 00:43:48 +000014607#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014608/**
14609 * xmlSAXParseDoc:
14610 * @sax: the SAX handler block
14611 * @cur: a pointer to an array of xmlChar
14612 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14613 * documents
14614 *
14615 * parse an XML in-memory document and build a tree.
14616 * It use the given SAX function block to handle the parsing callback.
14617 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014618 *
Owen Taylor3473f882001-02-23 17:55:21 +000014619 * Returns the resulting document tree
14620 */
14621
14622xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014623xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014624 xmlDocPtr ret;
14625 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014626 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014627
Daniel Veillard38936062004-11-04 17:45:11 +000014628 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014629
14630
14631 ctxt = xmlCreateDocParserCtxt(cur);
14632 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014633 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014634 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014635 ctxt->sax = sax;
14636 ctxt->userData = NULL;
14637 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014638 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014639
14640 xmlParseDocument(ctxt);
14641 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14642 else {
14643 ret = NULL;
14644 xmlFreeDoc(ctxt->myDoc);
14645 ctxt->myDoc = NULL;
14646 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014647 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014648 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014649 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014650
Owen Taylor3473f882001-02-23 17:55:21 +000014651 return(ret);
14652}
14653
14654/**
14655 * xmlParseDoc:
14656 * @cur: a pointer to an array of xmlChar
14657 *
14658 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014659 *
Owen Taylor3473f882001-02-23 17:55:21 +000014660 * Returns the resulting document tree
14661 */
14662
14663xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014664xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014665 return(xmlSAXParseDoc(NULL, cur, 0));
14666}
Daniel Veillard81273902003-09-30 00:43:48 +000014667#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014668
Daniel Veillard81273902003-09-30 00:43:48 +000014669#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014670/************************************************************************
14671 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014672 * Specific function to keep track of entities references *
14673 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014674 * *
14675 ************************************************************************/
14676
14677static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14678
14679/**
14680 * xmlAddEntityReference:
14681 * @ent : A valid entity
14682 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014683 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014684 *
14685 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14686 */
14687static void
14688xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14689 xmlNodePtr lastNode)
14690{
14691 if (xmlEntityRefFunc != NULL) {
14692 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14693 }
14694}
14695
14696
14697/**
14698 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014699 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014700 *
14701 * Set the function to call call back when a xml reference has been made
14702 */
14703void
14704xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14705{
14706 xmlEntityRefFunc = func;
14707}
Daniel Veillard81273902003-09-30 00:43:48 +000014708#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014709
14710/************************************************************************
14711 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014712 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014713 * *
14714 ************************************************************************/
14715
14716#ifdef LIBXML_XPATH_ENABLED
14717#include <libxml/xpath.h>
14718#endif
14719
Daniel Veillardffa3c742005-07-21 13:24:09 +000014720extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014721static int xmlParserInitialized = 0;
14722
14723/**
14724 * xmlInitParser:
14725 *
14726 * Initialization function for the XML parser.
14727 * This is not reentrant. Call once before processing in case of
14728 * use in multithreaded programs.
14729 */
14730
14731void
14732xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014733 if (xmlParserInitialized != 0)
14734 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014735
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014736#ifdef LIBXML_THREAD_ENABLED
14737 __xmlGlobalInitMutexLock();
14738 if (xmlParserInitialized == 0) {
14739#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014740 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014741 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014742 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14743 (xmlGenericError == NULL))
14744 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014745 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014746 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014747 xmlInitCharEncodingHandlers();
14748 xmlDefaultSAXHandlerInit();
14749 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014750#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014751 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014752#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014753#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014754 htmlInitAutoClose();
14755 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014756#endif
14757#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014758 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014759#endif
Daniel Veillard054c7162014-01-26 15:02:25 +010014760#ifdef LIBXML_CATALOG_ENABLED
14761 xmlInitializeCatalog();
14762#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014763 xmlParserInitialized = 1;
14764#ifdef LIBXML_THREAD_ENABLED
14765 }
14766 __xmlGlobalInitMutexUnlock();
14767#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014768}
14769
14770/**
14771 * xmlCleanupParser:
14772 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014773 * This function name is somewhat misleading. It does not clean up
14774 * parser state, it cleans up memory allocated by the library itself.
14775 * It is a cleanup function for the XML library. It tries to reclaim all
14776 * related global memory allocated for the library processing.
14777 * It doesn't deallocate any document related memory. One should
14778 * call xmlCleanupParser() only when the process has finished using
14779 * the library and all XML/HTML documents built with it.
14780 * See also xmlInitParser() which has the opposite function of preparing
14781 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014782 *
14783 * WARNING: if your application is multithreaded or has plugin support
14784 * calling this may crash the application if another thread or
14785 * a plugin is still using libxml2. It's sometimes very hard to
14786 * guess if libxml2 is in use in the application, some libraries
14787 * or plugins may use it without notice. In case of doubt abstain
14788 * from calling this function or do it just before calling exit()
14789 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014790 */
14791
14792void
14793xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014794 if (!xmlParserInitialized)
14795 return;
14796
Owen Taylor3473f882001-02-23 17:55:21 +000014797 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014798#ifdef LIBXML_CATALOG_ENABLED
14799 xmlCatalogCleanup();
14800#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014801 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014802 xmlCleanupInputCallbacks();
14803#ifdef LIBXML_OUTPUT_ENABLED
14804 xmlCleanupOutputCallbacks();
14805#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014806#ifdef LIBXML_SCHEMAS_ENABLED
14807 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014808 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014809#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014810 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014811 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014812 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014813 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014814 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014815}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014816
14817/************************************************************************
14818 * *
14819 * New set (2.6.0) of simpler and more flexible APIs *
14820 * *
14821 ************************************************************************/
14822
14823/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014824 * DICT_FREE:
14825 * @str: a string
14826 *
14827 * Free a string if it is not owned by the "dict" dictionnary in the
14828 * current scope
14829 */
14830#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014831 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014832 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14833 xmlFree((char *)(str));
14834
14835/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014836 * xmlCtxtReset:
14837 * @ctxt: an XML parser context
14838 *
14839 * Reset a parser context
14840 */
14841void
14842xmlCtxtReset(xmlParserCtxtPtr ctxt)
14843{
14844 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014845 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014846
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014847 if (ctxt == NULL)
14848 return;
14849
14850 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014851
14852 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14853 xmlFreeInputStream(input);
14854 }
14855 ctxt->inputNr = 0;
14856 ctxt->input = NULL;
14857
14858 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014859 if (ctxt->spaceTab != NULL) {
14860 ctxt->spaceTab[0] = -1;
14861 ctxt->space = &ctxt->spaceTab[0];
14862 } else {
14863 ctxt->space = NULL;
14864 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014865
14866
14867 ctxt->nodeNr = 0;
14868 ctxt->node = NULL;
14869
14870 ctxt->nameNr = 0;
14871 ctxt->name = NULL;
14872
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014873 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014874 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014875 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014876 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014877 DICT_FREE(ctxt->directory);
14878 ctxt->directory = NULL;
14879 DICT_FREE(ctxt->extSubURI);
14880 ctxt->extSubURI = NULL;
14881 DICT_FREE(ctxt->extSubSystem);
14882 ctxt->extSubSystem = NULL;
14883 if (ctxt->myDoc != NULL)
14884 xmlFreeDoc(ctxt->myDoc);
14885 ctxt->myDoc = NULL;
14886
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014887 ctxt->standalone = -1;
14888 ctxt->hasExternalSubset = 0;
14889 ctxt->hasPErefs = 0;
14890 ctxt->html = 0;
14891 ctxt->external = 0;
14892 ctxt->instate = XML_PARSER_START;
14893 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014894
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014895 ctxt->wellFormed = 1;
14896 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014897 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014898 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014899#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014900 ctxt->vctxt.userData = ctxt;
14901 ctxt->vctxt.error = xmlParserValidityError;
14902 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014903#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014904 ctxt->record_info = 0;
14905 ctxt->nbChars = 0;
14906 ctxt->checkIndex = 0;
14907 ctxt->inSubset = 0;
14908 ctxt->errNo = XML_ERR_OK;
14909 ctxt->depth = 0;
14910 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14911 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014912 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014913 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014914 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014915 xmlInitNodeInfoSeq(&ctxt->node_seq);
14916
14917 if (ctxt->attsDefault != NULL) {
14918 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14919 ctxt->attsDefault = NULL;
14920 }
14921 if (ctxt->attsSpecial != NULL) {
14922 xmlHashFree(ctxt->attsSpecial, NULL);
14923 ctxt->attsSpecial = NULL;
14924 }
14925
Daniel Veillard4432df22003-09-28 18:58:27 +000014926#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014927 if (ctxt->catalogs != NULL)
14928 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014929#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014930 if (ctxt->lastError.code != XML_ERR_OK)
14931 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014932}
14933
14934/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014935 * xmlCtxtResetPush:
14936 * @ctxt: an XML parser context
14937 * @chunk: a pointer to an array of chars
14938 * @size: number of chars in the array
14939 * @filename: an optional file name or URI
14940 * @encoding: the document encoding, or NULL
14941 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014942 * Reset a push parser context
14943 *
14944 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014945 */
14946int
14947xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14948 int size, const char *filename, const char *encoding)
14949{
14950 xmlParserInputPtr inputStream;
14951 xmlParserInputBufferPtr buf;
14952 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14953
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014954 if (ctxt == NULL)
14955 return(1);
14956
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014957 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14958 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14959
14960 buf = xmlAllocParserInputBuffer(enc);
14961 if (buf == NULL)
14962 return(1);
14963
14964 if (ctxt == NULL) {
14965 xmlFreeParserInputBuffer(buf);
14966 return(1);
14967 }
14968
14969 xmlCtxtReset(ctxt);
14970
14971 if (ctxt->pushTab == NULL) {
14972 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14973 sizeof(xmlChar *));
14974 if (ctxt->pushTab == NULL) {
14975 xmlErrMemory(ctxt, NULL);
14976 xmlFreeParserInputBuffer(buf);
14977 return(1);
14978 }
14979 }
14980
14981 if (filename == NULL) {
14982 ctxt->directory = NULL;
14983 } else {
14984 ctxt->directory = xmlParserGetDirectory(filename);
14985 }
14986
14987 inputStream = xmlNewInputStream(ctxt);
14988 if (inputStream == NULL) {
14989 xmlFreeParserInputBuffer(buf);
14990 return(1);
14991 }
14992
14993 if (filename == NULL)
14994 inputStream->filename = NULL;
14995 else
14996 inputStream->filename = (char *)
14997 xmlCanonicPath((const xmlChar *) filename);
14998 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014999 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015000
15001 inputPush(ctxt, inputStream);
15002
15003 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15004 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015005 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15006 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015007
15008 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15009
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015010 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015011#ifdef DEBUG_PUSH
15012 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15013#endif
15014 }
15015
15016 if (encoding != NULL) {
15017 xmlCharEncodingHandlerPtr hdlr;
15018
Daniel Veillard37334572008-07-31 08:20:02 +000015019 if (ctxt->encoding != NULL)
15020 xmlFree((xmlChar *) ctxt->encoding);
15021 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15022
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015023 hdlr = xmlFindCharEncodingHandler(encoding);
15024 if (hdlr != NULL) {
15025 xmlSwitchToEncoding(ctxt, hdlr);
15026 } else {
15027 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15028 "Unsupported encoding %s\n", BAD_CAST encoding);
15029 }
15030 } else if (enc != XML_CHAR_ENCODING_NONE) {
15031 xmlSwitchEncoding(ctxt, enc);
15032 }
15033
15034 return(0);
15035}
15036
Daniel Veillard37334572008-07-31 08:20:02 +000015037
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015038/**
Daniel Veillard37334572008-07-31 08:20:02 +000015039 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015040 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015041 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015042 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015043 *
15044 * Applies the options to the parser context
15045 *
15046 * Returns 0 in case of success, the set of unknown or unimplemented options
15047 * in case of error.
15048 */
Daniel Veillard37334572008-07-31 08:20:02 +000015049static int
15050xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015051{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015052 if (ctxt == NULL)
15053 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015054 if (encoding != NULL) {
15055 if (ctxt->encoding != NULL)
15056 xmlFree((xmlChar *) ctxt->encoding);
15057 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15058 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015059 if (options & XML_PARSE_RECOVER) {
15060 ctxt->recovery = 1;
15061 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015062 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015063 } else
15064 ctxt->recovery = 0;
15065 if (options & XML_PARSE_DTDLOAD) {
15066 ctxt->loadsubset = XML_DETECT_IDS;
15067 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015068 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015069 } else
15070 ctxt->loadsubset = 0;
15071 if (options & XML_PARSE_DTDATTR) {
15072 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15073 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015074 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015075 }
15076 if (options & XML_PARSE_NOENT) {
15077 ctxt->replaceEntities = 1;
15078 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15079 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015080 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015081 } else
15082 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015083 if (options & XML_PARSE_PEDANTIC) {
15084 ctxt->pedantic = 1;
15085 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015086 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015087 } else
15088 ctxt->pedantic = 0;
15089 if (options & XML_PARSE_NOBLANKS) {
15090 ctxt->keepBlanks = 0;
15091 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15092 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015093 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015094 } else
15095 ctxt->keepBlanks = 1;
15096 if (options & XML_PARSE_DTDVALID) {
15097 ctxt->validate = 1;
15098 if (options & XML_PARSE_NOWARNING)
15099 ctxt->vctxt.warning = NULL;
15100 if (options & XML_PARSE_NOERROR)
15101 ctxt->vctxt.error = NULL;
15102 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015103 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015104 } else
15105 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015106 if (options & XML_PARSE_NOWARNING) {
15107 ctxt->sax->warning = NULL;
15108 options -= XML_PARSE_NOWARNING;
15109 }
15110 if (options & XML_PARSE_NOERROR) {
15111 ctxt->sax->error = NULL;
15112 ctxt->sax->fatalError = NULL;
15113 options -= XML_PARSE_NOERROR;
15114 }
Daniel Veillard81273902003-09-30 00:43:48 +000015115#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015116 if (options & XML_PARSE_SAX1) {
15117 ctxt->sax->startElement = xmlSAX2StartElement;
15118 ctxt->sax->endElement = xmlSAX2EndElement;
15119 ctxt->sax->startElementNs = NULL;
15120 ctxt->sax->endElementNs = NULL;
15121 ctxt->sax->initialized = 1;
15122 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015123 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015124 }
Daniel Veillard81273902003-09-30 00:43:48 +000015125#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015126 if (options & XML_PARSE_NODICT) {
15127 ctxt->dictNames = 0;
15128 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015129 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015130 } else {
15131 ctxt->dictNames = 1;
15132 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015133 if (options & XML_PARSE_NOCDATA) {
15134 ctxt->sax->cdataBlock = NULL;
15135 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015136 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015137 }
15138 if (options & XML_PARSE_NSCLEAN) {
15139 ctxt->options |= XML_PARSE_NSCLEAN;
15140 options -= XML_PARSE_NSCLEAN;
15141 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015142 if (options & XML_PARSE_NONET) {
15143 ctxt->options |= XML_PARSE_NONET;
15144 options -= XML_PARSE_NONET;
15145 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015146 if (options & XML_PARSE_COMPACT) {
15147 ctxt->options |= XML_PARSE_COMPACT;
15148 options -= XML_PARSE_COMPACT;
15149 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015150 if (options & XML_PARSE_OLD10) {
15151 ctxt->options |= XML_PARSE_OLD10;
15152 options -= XML_PARSE_OLD10;
15153 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015154 if (options & XML_PARSE_NOBASEFIX) {
15155 ctxt->options |= XML_PARSE_NOBASEFIX;
15156 options -= XML_PARSE_NOBASEFIX;
15157 }
15158 if (options & XML_PARSE_HUGE) {
15159 ctxt->options |= XML_PARSE_HUGE;
15160 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015161 if (ctxt->dict != NULL)
15162 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015163 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015164 if (options & XML_PARSE_OLDSAX) {
15165 ctxt->options |= XML_PARSE_OLDSAX;
15166 options -= XML_PARSE_OLDSAX;
15167 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015168 if (options & XML_PARSE_IGNORE_ENC) {
15169 ctxt->options |= XML_PARSE_IGNORE_ENC;
15170 options -= XML_PARSE_IGNORE_ENC;
15171 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015172 if (options & XML_PARSE_BIG_LINES) {
15173 ctxt->options |= XML_PARSE_BIG_LINES;
15174 options -= XML_PARSE_BIG_LINES;
15175 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015176 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015177 return (options);
15178}
15179
15180/**
Daniel Veillard37334572008-07-31 08:20:02 +000015181 * xmlCtxtUseOptions:
15182 * @ctxt: an XML parser context
15183 * @options: a combination of xmlParserOption
15184 *
15185 * Applies the options to the parser context
15186 *
15187 * Returns 0 in case of success, the set of unknown or unimplemented options
15188 * in case of error.
15189 */
15190int
15191xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15192{
15193 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15194}
15195
15196/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015197 * xmlDoRead:
15198 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015199 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015200 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015201 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015202 * @reuse: keep the context for reuse
15203 *
15204 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015205 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015206 * Returns the resulting document tree or NULL
15207 */
15208static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015209xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15210 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015211{
15212 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015213
15214 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015215 if (encoding != NULL) {
15216 xmlCharEncodingHandlerPtr hdlr;
15217
15218 hdlr = xmlFindCharEncodingHandler(encoding);
15219 if (hdlr != NULL)
15220 xmlSwitchToEncoding(ctxt, hdlr);
15221 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015222 if ((URL != NULL) && (ctxt->input != NULL) &&
15223 (ctxt->input->filename == NULL))
15224 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015225 xmlParseDocument(ctxt);
15226 if ((ctxt->wellFormed) || ctxt->recovery)
15227 ret = ctxt->myDoc;
15228 else {
15229 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015230 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015231 xmlFreeDoc(ctxt->myDoc);
15232 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015233 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015234 ctxt->myDoc = NULL;
15235 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015236 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015237 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015238
15239 return (ret);
15240}
15241
15242/**
15243 * xmlReadDoc:
15244 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015245 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015246 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015247 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015248 *
15249 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015250 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015251 * Returns the resulting document tree
15252 */
15253xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015254xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015255{
15256 xmlParserCtxtPtr ctxt;
15257
15258 if (cur == NULL)
15259 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015260 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015261
15262 ctxt = xmlCreateDocParserCtxt(cur);
15263 if (ctxt == NULL)
15264 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015265 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266}
15267
15268/**
15269 * xmlReadFile:
15270 * @filename: a file or URL
15271 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015272 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015273 *
15274 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015275 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015276 * Returns the resulting document tree
15277 */
15278xmlDocPtr
15279xmlReadFile(const char *filename, const char *encoding, int options)
15280{
15281 xmlParserCtxtPtr ctxt;
15282
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015283 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015284 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015285 if (ctxt == NULL)
15286 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015287 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015288}
15289
15290/**
15291 * xmlReadMemory:
15292 * @buffer: a pointer to a char array
15293 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015294 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015295 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015296 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015297 *
15298 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015299 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015300 * Returns the resulting document tree
15301 */
15302xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015303xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015304{
15305 xmlParserCtxtPtr ctxt;
15306
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015307 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015308 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15309 if (ctxt == NULL)
15310 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015311 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015312}
15313
15314/**
15315 * xmlReadFd:
15316 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015317 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015318 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015319 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015320 *
15321 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015322 * NOTE that the file descriptor will not be closed when the
15323 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015324 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015325 * Returns the resulting document tree
15326 */
15327xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015328xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015329{
15330 xmlParserCtxtPtr ctxt;
15331 xmlParserInputBufferPtr input;
15332 xmlParserInputPtr stream;
15333
15334 if (fd < 0)
15335 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015336 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015337
15338 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15339 if (input == NULL)
15340 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015341 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015342 ctxt = xmlNewParserCtxt();
15343 if (ctxt == NULL) {
15344 xmlFreeParserInputBuffer(input);
15345 return (NULL);
15346 }
15347 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15348 if (stream == NULL) {
15349 xmlFreeParserInputBuffer(input);
15350 xmlFreeParserCtxt(ctxt);
15351 return (NULL);
15352 }
15353 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015354 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015355}
15356
15357/**
15358 * xmlReadIO:
15359 * @ioread: an I/O read function
15360 * @ioclose: an I/O close function
15361 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015362 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015363 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015364 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015365 *
15366 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015367 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015368 * Returns the resulting document tree
15369 */
15370xmlDocPtr
15371xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015372 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015373{
15374 xmlParserCtxtPtr ctxt;
15375 xmlParserInputBufferPtr input;
15376 xmlParserInputPtr stream;
15377
15378 if (ioread == NULL)
15379 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015380 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015381
15382 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15383 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015384 if (input == NULL) {
15385 if (ioclose != NULL)
15386 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015387 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015388 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015389 ctxt = xmlNewParserCtxt();
15390 if (ctxt == NULL) {
15391 xmlFreeParserInputBuffer(input);
15392 return (NULL);
15393 }
15394 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15395 if (stream == NULL) {
15396 xmlFreeParserInputBuffer(input);
15397 xmlFreeParserCtxt(ctxt);
15398 return (NULL);
15399 }
15400 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015401 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015402}
15403
15404/**
15405 * xmlCtxtReadDoc:
15406 * @ctxt: an XML parser context
15407 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015408 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015409 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015410 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015411 *
15412 * parse an XML in-memory document and build a tree.
15413 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015414 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015415 * Returns the resulting document tree
15416 */
15417xmlDocPtr
15418xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015419 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015420{
15421 xmlParserInputPtr stream;
15422
15423 if (cur == NULL)
15424 return (NULL);
15425 if (ctxt == NULL)
15426 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015427 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428
15429 xmlCtxtReset(ctxt);
15430
15431 stream = xmlNewStringInputStream(ctxt, cur);
15432 if (stream == NULL) {
15433 return (NULL);
15434 }
15435 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015436 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015437}
15438
15439/**
15440 * xmlCtxtReadFile:
15441 * @ctxt: an XML parser context
15442 * @filename: a file or URL
15443 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015444 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015445 *
15446 * parse an XML file from the filesystem or the network.
15447 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015448 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015449 * Returns the resulting document tree
15450 */
15451xmlDocPtr
15452xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15453 const char *encoding, int options)
15454{
15455 xmlParserInputPtr stream;
15456
15457 if (filename == NULL)
15458 return (NULL);
15459 if (ctxt == NULL)
15460 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015461 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015462
15463 xmlCtxtReset(ctxt);
15464
Daniel Veillard29614c72004-11-26 10:47:26 +000015465 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015466 if (stream == NULL) {
15467 return (NULL);
15468 }
15469 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015470 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015471}
15472
15473/**
15474 * xmlCtxtReadMemory:
15475 * @ctxt: an XML parser context
15476 * @buffer: a pointer to a char array
15477 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015478 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015479 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015480 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015481 *
15482 * parse an XML in-memory document and build a tree.
15483 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015484 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015485 * Returns the resulting document tree
15486 */
15487xmlDocPtr
15488xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015489 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015490{
15491 xmlParserInputBufferPtr input;
15492 xmlParserInputPtr stream;
15493
15494 if (ctxt == NULL)
15495 return (NULL);
15496 if (buffer == NULL)
15497 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015498 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015499
15500 xmlCtxtReset(ctxt);
15501
15502 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15503 if (input == NULL) {
15504 return(NULL);
15505 }
15506
15507 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15508 if (stream == NULL) {
15509 xmlFreeParserInputBuffer(input);
15510 return(NULL);
15511 }
15512
15513 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015514 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015515}
15516
15517/**
15518 * xmlCtxtReadFd:
15519 * @ctxt: an XML parser context
15520 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015521 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015522 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015523 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015524 *
15525 * parse an XML from a file descriptor and build a tree.
15526 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015527 * NOTE that the file descriptor will not be closed when the
15528 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015529 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015530 * Returns the resulting document tree
15531 */
15532xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015533xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15534 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015535{
15536 xmlParserInputBufferPtr input;
15537 xmlParserInputPtr stream;
15538
15539 if (fd < 0)
15540 return (NULL);
15541 if (ctxt == NULL)
15542 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015543 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015544
15545 xmlCtxtReset(ctxt);
15546
15547
15548 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15549 if (input == NULL)
15550 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015551 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015552 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15553 if (stream == NULL) {
15554 xmlFreeParserInputBuffer(input);
15555 return (NULL);
15556 }
15557 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015558 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015559}
15560
15561/**
15562 * xmlCtxtReadIO:
15563 * @ctxt: an XML parser context
15564 * @ioread: an I/O read function
15565 * @ioclose: an I/O close function
15566 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015567 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015568 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015569 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015570 *
15571 * parse an XML document from I/O functions and source and build a tree.
15572 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015573 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015574 * Returns the resulting document tree
15575 */
15576xmlDocPtr
15577xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15578 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015579 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015580 const char *encoding, int options)
15581{
15582 xmlParserInputBufferPtr input;
15583 xmlParserInputPtr stream;
15584
15585 if (ioread == NULL)
15586 return (NULL);
15587 if (ctxt == NULL)
15588 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015589 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015590
15591 xmlCtxtReset(ctxt);
15592
15593 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15594 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015595 if (input == NULL) {
15596 if (ioclose != NULL)
15597 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015598 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015599 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015600 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15601 if (stream == NULL) {
15602 xmlFreeParserInputBuffer(input);
15603 return (NULL);
15604 }
15605 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015606 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015607}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015608
15609#define bottom_parser
15610#include "elfgcchack.h"