blob: ddf3b5bd40ac985b133f0049b6186a24ade017f1 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800125 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 if (replacement != 0) {
134 if (replacement < XML_MAX_TEXT_LENGTH)
135 return(0);
136
137 /*
138 * If the volume of entity copy reaches 10 times the
139 * amount of parsed data and over the large text threshold
140 * then that's very likely to be an abuse.
141 */
142 if (ctxt->input != NULL) {
143 consumed = ctxt->input->consumed +
144 (ctxt->input->cur - ctxt->input->base);
145 }
146 consumed += ctxt->sizeentities;
147
148 if (replacement < XML_PARSER_NON_LINEAR * consumed)
149 return(0);
150 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000151 /*
152 * Do the check based on the replacement size of the entity
153 */
154 if (size < XML_PARSER_BIG_ENTITY)
155 return(0);
156
157 /*
158 * A limit on the amount of text data reasonably used
159 */
160 if (ctxt->input != NULL) {
161 consumed = ctxt->input->consumed +
162 (ctxt->input->cur - ctxt->input->base);
163 }
164 consumed += ctxt->sizeentities;
165
166 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168 return (0);
169 } else if (ent != NULL) {
170 /*
171 * use the number of parsed entities in the replacement
172 */
173 size = ent->checked;
174
175 /*
176 * The amount of data parsed counting entities size only once
177 */
178 if (ctxt->input != NULL) {
179 consumed = ctxt->input->consumed +
180 (ctxt->input->cur - ctxt->input->base);
181 }
182 consumed += ctxt->sizeentities;
183
184 /*
185 * Check the density of entities for the amount of data
186 * knowing an entity reference will take at least 3 bytes
187 */
188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189 return (0);
190 } else {
191 /*
192 * strange we got no data for checking just return
193 */
194 return (0);
195 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 return (1);
198}
199
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000200/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000201 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000203 * arbitrary depth limit for the XML documents that we allow to
204 * process. This is not a limitation of the parser but a safety
205 * boundary feature. It can be disabled with the XML_PARSE_HUGE
206 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000207 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000208unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard0fb18932003-09-07 09:14:37 +0000210
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000213#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000214#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000215#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216
Daniel Veillard1f972e92012-08-15 10:16:37 +0800217/**
218 * XML_PARSER_CHUNK_SIZE
219 *
220 * When calling GROW that's the minimal amount of data
221 * the parser expected to have received. It is not a hard
222 * limit but an optimization when reading strings like Names
223 * It is not strictly needed as long as inputs available characters
224 * are followed by 0, which should be provided by the I/O level
225 */
226#define XML_PARSER_CHUNK_SIZE 100
227
Owen Taylor3473f882001-02-23 17:55:21 +0000228/*
Owen Taylor3473f882001-02-23 17:55:21 +0000229 * List of XML prefixed PI allowed by W3C specs
230 */
231
Daniel Veillardb44025c2001-10-11 22:55:55 +0000232static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000233 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800234 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000235 NULL
236};
237
Daniel Veillarda07050d2003-10-19 14:46:32 +0000238
Owen Taylor3473f882001-02-23 17:55:21 +0000239/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200240static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000242
Daniel Veillard7d515752003-09-26 19:12:37 +0000243static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000244xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000246 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000247 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard37334572008-07-31 08:20:02 +0000249static int
250xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000252#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000253static void
254xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000256#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000257
Daniel Veillard7d515752003-09-26 19:12:37 +0000258static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000259xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000261
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000262static int
263xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264
Daniel Veillarde57ec792003-09-10 10:50:59 +0000265/************************************************************************
266 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800267 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 * *
269 ************************************************************************/
270
271/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 * xmlErrAttributeDup:
273 * @ctxt: an XML parser context
274 * @prefix: the attribute prefix
275 * @localname: the attribute localname
276 *
277 * Handle a redefinition of attribute error
278 */
279static void
280xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281 const xmlChar * localname)
282{
Daniel Veillard157fee02003-10-31 10:36:03 +0000283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284 (ctxt->instate == XML_PARSER_EOF))
285 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000286 if (ctxt != NULL)
287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200288
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000289 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 (const char *) localname, NULL, NULL, 0, 0,
293 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000294 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 (const char *) prefix, (const char *) localname,
298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000300 if (ctxt != NULL) {
301 ctxt->wellFormed = 0;
302 if (ctxt->recovery == 0)
303 ctxt->disableSAX = 1;
304 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305}
306
307/**
308 * xmlFatalErr:
309 * @ctxt: an XML parser context
310 * @error: the error number
311 * @extra: extra information string
312 *
313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314 */
315static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317{
318 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800319 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320
Daniel Veillard157fee02003-10-31 10:36:03 +0000321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322 (ctxt->instate == XML_PARSER_EOF))
323 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 switch (error) {
325 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800326 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800329 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800332 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "internal error";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800338 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800341 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800344 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800350 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800353 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800356 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "Fragment not allowed";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
440 case XML_ERR_CONDSEC_INVALID_KEYWORD:
441 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800442 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800445 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800448 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800451 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800454 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000456 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000492 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800495 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 if (info == NULL)
507 snprintf(errstr, 128, "%s\n", errmsg);
508 else
509 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000510 if (ctxt != NULL)
511 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000514 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL) {
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0)
518 ctxt->disableSAX = 1;
519 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520}
521
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000522/**
523 * xmlFatalErrMsg:
524 * @ctxt: an XML parser context
525 * @error: the error number
526 * @msg: the error message
527 *
528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529 */
530static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000533{
Daniel Veillard157fee02003-10-31 10:36:03 +0000534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535 (ctxt->instate == XML_PARSER_EOF))
536 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000537 if (ctxt != NULL)
538 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000546}
547
548/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000549 * xmlWarningMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 * @str2: extra data
555 *
556 * Handle a warning.
557 */
558static void
559xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg, const xmlChar *str1, const xmlChar *str2)
561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000563
Daniel Veillard157fee02003-10-31 10:36:03 +0000564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000569 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200570 if (ctxt != NULL) {
571 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000572 (ctxt->sax) ? ctxt->sax->warning : NULL,
573 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_WARNING, NULL, 0,
576 (const char *) str1, (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200578 } else {
579 __xmlRaiseError(schannel, NULL, NULL,
580 ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_WARNING, NULL, 0,
582 (const char *) str1, (const char *) str2, NULL, 0, 0,
583 msg, (const char *) str1, (const char *) str2);
584 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585}
586
587/**
588 * xmlValidityError:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the error message
592 * @str1: extra data
593 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000594 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595 */
596static void
597xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000598 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000599{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000600 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000601
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->errNo = error;
607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608 schannel = ctxt->sax->serror;
609 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200610 if (ctxt != NULL) {
611 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000612 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613 ctxt, NULL, XML_FROM_DTD, error,
614 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000615 (const char *) str2, NULL, 0, 0,
616 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000617 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200618 } else {
619 __xmlRaiseError(schannel, NULL, NULL,
620 ctxt, NULL, XML_FROM_DTD, error,
621 XML_ERR_ERROR, NULL, 0, (const char *) str1,
622 (const char *) str2, NULL, 0, 0,
623 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000624 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000625}
626
627/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000628 * xmlFatalErrMsgInt:
629 * @ctxt: an XML parser context
630 * @error: the error number
631 * @msg: the error message
632 * @val: an integer value
633 *
634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635 */
636static void
637xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000638 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000639{
Daniel Veillard157fee02003-10-31 10:36:03 +0000640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000643 if (ctxt != NULL)
644 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000645 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000648 if (ctxt != NULL) {
649 ctxt->wellFormed = 0;
650 if (ctxt->recovery == 0)
651 ctxt->disableSAX = 1;
652 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000653}
654
655/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000656 * xmlFatalErrMsgStrIntStr:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @str1: an string info
661 * @val: an integer value
662 * @str2: an string info
663 *
664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665 */
666static void
667xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800668 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000669 const xmlChar *str2)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678 NULL, 0, (const char *) str1, (const char *) str2,
679 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000680 if (ctxt != NULL) {
681 ctxt->wellFormed = 0;
682 if (ctxt->recovery == 0)
683 ctxt->disableSAX = 1;
684 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000685}
686
687/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000688 * xmlFatalErrMsgStr:
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the error message
692 * @val: a string value
693 *
694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695 */
696static void
697xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000698 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000699{
Daniel Veillard157fee02003-10-31 10:36:03 +0000700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701 (ctxt->instate == XML_PARSER_EOF))
702 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000706 XML_FROM_PARSER, error, XML_ERR_FATAL,
707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL) {
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
713 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000714}
715
716/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000717 * xmlErrMsgStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @val: a string value
722 *
723 * Handle a non fatal parser error
724 */
725static void
726xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727 const char *msg, const xmlChar * val)
728{
Daniel Veillard157fee02003-10-31 10:36:03 +0000729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730 (ctxt->instate == XML_PARSER_EOF))
731 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000732 if (ctxt != NULL)
733 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000735 XML_FROM_PARSER, error, XML_ERR_ERROR,
736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737 val);
738}
739
740/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000741 * xmlNsErr:
742 * @ctxt: an XML parser context
743 * @error: the error number
744 * @msg: the message
745 * @info1: extra information string
746 * @info2: extra information string
747 *
748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749 */
750static void
751xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000753 const xmlChar * info1, const xmlChar * info2,
754 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000755{
Daniel Veillard157fee02003-10-31 10:36:03 +0000756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757 (ctxt->instate == XML_PARSER_EOF))
758 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000759 if (ctxt != NULL)
760 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000762 XML_ERR_ERROR, NULL, 0, (const char *) info1,
763 (const char *) info2, (const char *) info3, 0, 0, msg,
764 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000765 if (ctxt != NULL)
766 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000767}
768
Daniel Veillard37334572008-07-31 08:20:02 +0000769/**
770 * xmlNsWarn
771 * @ctxt: an XML parser context
772 * @error: the error number
773 * @msg: the message
774 * @info1: extra information string
775 * @info2: extra information string
776 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800777 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000778 */
779static void
780xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781 const char *msg,
782 const xmlChar * info1, const xmlChar * info2,
783 const xmlChar * info3)
784{
785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786 (ctxt->instate == XML_PARSER_EOF))
787 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789 XML_ERR_WARNING, NULL, 0, (const char *) info1,
790 (const char *) info2, (const char *) info3, 0, 0, msg,
791 info1, info2, info3);
792}
793
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000794/************************************************************************
795 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800796 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797 * *
798 ************************************************************************/
799
800/**
801 * xmlHasFeature:
802 * @feature: the feature to be examined
803 *
804 * Examines if the library has been compiled with a given feature.
805 *
806 * Returns a non-zero value if the feature exist, otherwise zero.
807 * Returns zero (0) if the feature does not exist or an unknown
808 * unknown feature is requested, non-zero otherwise.
809 */
810int
811xmlHasFeature(xmlFeature feature)
812{
813 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_THREAD_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_TREE_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_OUTPUT_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_PUSH_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_READER_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_PATTERN_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_WRITER_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_SAX1_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_FTP_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_HTTP_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_VALID_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_HTML_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_LEGACY_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_C14N_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_CATALOG_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_XPATH_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_XPTR_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_XINCLUDE_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_ICONV_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_ISO8859X_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_UNICODE_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_REGEXP_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_AUTOMATA_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_EXPR_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_SCHEMAS_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_SCHEMATRON_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_MODULES_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_DEBUG_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef DEBUG_MEMORY_LOCATION
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_DEBUG_RUNTIME
990 return(1);
991#else
992 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000993#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000994 case XML_WITH_ZLIB:
995#ifdef LIBXML_ZLIB_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001000 case XML_WITH_LZMA:
1001#ifdef LIBXML_LZMA_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 case XML_WITH_ICU:
1007#ifdef LIBXML_ICU_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012 default:
1013 break;
1014 }
1015 return(0);
1016}
1017
1018/************************************************************************
1019 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001020 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 * *
1022 ************************************************************************/
1023
1024/**
1025 * xmlDetectSAX2:
1026 * @ctxt: an XML parser context
1027 *
1028 * Do the SAX2 detection and specific intialization
1029 */
1030static void
1031xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035 ((ctxt->sax->startElementNs != NULL) ||
1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001037#else
1038 ctxt->sax2 = 1;
1039#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040
1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001046 xmlErrMemory(ctxt, NULL);
1047 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001048}
1049
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050typedef struct _xmlDefAttrs xmlDefAttrs;
1051typedef xmlDefAttrs *xmlDefAttrsPtr;
1052struct _xmlDefAttrs {
1053 int nbAttrs; /* number of defaulted attributes on that element */
1054 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001055 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057
1058/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001059 * xmlAttrNormalizeSpace:
1060 * @src: the source string
1061 * @dst: the target string
1062 *
1063 * Normalize the space in non CDATA attribute values:
1064 * If the attribute type is not CDATA, then the XML processor MUST further
1065 * process the normalized attribute value by discarding any leading and
1066 * trailing space (#x20) characters, and by replacing sequences of space
1067 * (#x20) characters by a single space (#x20) character.
1068 * Note that the size of dst need to be at least src, and if one doesn't need
1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070 * passing src as dst is just fine.
1071 *
1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073 * is needed.
1074 */
1075static xmlChar *
1076xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077{
1078 if ((src == NULL) || (dst == NULL))
1079 return(NULL);
1080
1081 while (*src == 0x20) src++;
1082 while (*src != 0) {
1083 if (*src == 0x20) {
1084 while (*src == 0x20) src++;
1085 if (*src != 0)
1086 *dst++ = 0x20;
1087 } else {
1088 *dst++ = *src++;
1089 }
1090 }
1091 *dst = 0;
1092 if (dst == src)
1093 return(NULL);
1094 return(dst);
1095}
1096
1097/**
1098 * xmlAttrNormalizeSpace2:
1099 * @src: the source string
1100 *
1101 * Normalize the space in non CDATA attribute values, a slightly more complex
1102 * front end to avoid allocation problems when running on attribute values
1103 * coming from the input.
1104 *
1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106 * is needed.
1107 */
1108static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001109xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001110{
1111 int i;
1112 int remove_head = 0;
1113 int need_realloc = 0;
1114 const xmlChar *cur;
1115
1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117 return(NULL);
1118 i = *len;
1119 if (i <= 0)
1120 return(NULL);
1121
1122 cur = src;
1123 while (*cur == 0x20) {
1124 cur++;
1125 remove_head++;
1126 }
1127 while (*cur != 0) {
1128 if (*cur == 0x20) {
1129 cur++;
1130 if ((*cur == 0x20) || (*cur == 0)) {
1131 need_realloc = 1;
1132 break;
1133 }
1134 } else
1135 cur++;
1136 }
1137 if (need_realloc) {
1138 xmlChar *ret;
1139
1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141 if (ret == NULL) {
1142 xmlErrMemory(ctxt, NULL);
1143 return(NULL);
1144 }
1145 xmlAttrNormalizeSpace(ret, ret);
1146 *len = (int) strlen((const char *)ret);
1147 return(ret);
1148 } else if (remove_head) {
1149 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 memmove(src, src + remove_head, 1 + *len);
1151 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001152 }
1153 return(NULL);
1154}
1155
1156/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 * xmlAddDefAttrs:
1158 * @ctxt: an XML parser context
1159 * @fullname: the element fullname
1160 * @fullattr: the attribute fullname
1161 * @value: the attribute value
1162 *
1163 * Add a defaulted attribute for an element
1164 */
1165static void
1166xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167 const xmlChar *fullname,
1168 const xmlChar *fullattr,
1169 const xmlChar *value) {
1170 xmlDefAttrsPtr defaults;
1171 int len;
1172 const xmlChar *name;
1173 const xmlChar *prefix;
1174
Daniel Veillard6a31b832008-03-26 14:06:44 +00001175 /*
1176 * Allows to detect attribute redefinitions
1177 */
1178 if (ctxt->attsSpecial != NULL) {
1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180 return;
1181 }
1182
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 if (ctxt->attsDefault == NULL)
1186 goto mem_error;
1187 }
1188
1189 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 * split the element name into prefix:localname , the string found
1191 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 */
1193 name = xmlSplitQName3(fullname, &len);
1194 if (name == NULL) {
1195 name = xmlDictLookup(ctxt->dict, fullname, -1);
1196 prefix = NULL;
1197 } else {
1198 name = xmlDictLookup(ctxt->dict, name, -1);
1199 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200 }
1201
1202 /*
1203 * make sure there is some storage
1204 */
1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206 if (defaults == NULL) {
1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 if (defaults == NULL)
1210 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001212 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214 defaults, NULL) < 0) {
1215 xmlFree(defaults);
1216 goto mem_error;
1217 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001219 xmlDefAttrsPtr temp;
1220
1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001223 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228 defaults, NULL) < 0) {
1229 xmlFree(defaults);
1230 goto mem_error;
1231 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 }
1233
1234 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001235 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001236 * are within the DTD and hen not associated to namespace names.
1237 */
1238 name = xmlSplitQName3(fullattr, &len);
1239 if (name == NULL) {
1240 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241 prefix = NULL;
1242 } else {
1243 name = xmlDictLookup(ctxt->dict, name, -1);
1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245 }
1246
Daniel Veillardae0765b2008-07-31 19:54:59 +00001247 defaults->values[5 * defaults->nbAttrs] = name;
1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 /* intern the string and precompute the end */
1250 len = xmlStrlen(value);
1251 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001252 defaults->values[5 * defaults->nbAttrs + 2] = value;
1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254 if (ctxt->external)
1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256 else
1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 defaults->nbAttrs++;
1259
1260 return;
1261
1262mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001263 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 return;
1265}
1266
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001267/**
1268 * xmlAddSpecialAttr:
1269 * @ctxt: an XML parser context
1270 * @fullname: the element fullname
1271 * @fullattr: the attribute fullname
1272 * @type: the attribute type
1273 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001275 */
1276static void
1277xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278 const xmlChar *fullname,
1279 const xmlChar *fullattr,
1280 int type)
1281{
1282 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001284 if (ctxt->attsSpecial == NULL)
1285 goto mem_error;
1286 }
1287
Daniel Veillardac4118d2008-01-11 05:27:32 +00001288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289 return;
1290
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001293 return;
1294
1295mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001296 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001297 return;
1298}
1299
Daniel Veillard4432df22003-09-28 18:58:27 +00001300/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001301 * xmlCleanSpecialAttrCallback:
1302 *
1303 * Removes CDATA attributes from the special attribute table
1304 */
1305static void
1306xmlCleanSpecialAttrCallback(void *payload, void *data,
1307 const xmlChar *fullname, const xmlChar *fullattr,
1308 const xmlChar *unused ATTRIBUTE_UNUSED) {
1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001311 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313 }
1314}
1315
1316/**
1317 * xmlCleanSpecialAttr:
1318 * @ctxt: an XML parser context
1319 *
1320 * Trim the list of attributes defined to remove all those of type
1321 * CDATA as they are not special. This call should be done when finishing
1322 * to parse the DTD and before starting to parse the document root.
1323 */
1324static void
1325xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326{
1327 if (ctxt->attsSpecial == NULL)
1328 return;
1329
1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331
1332 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333 xmlHashFree(ctxt->attsSpecial, NULL);
1334 ctxt->attsSpecial = NULL;
1335 }
1336 return;
1337}
1338
1339/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001340 * xmlCheckLanguageID:
1341 * @lang: pointer to the string value
1342 *
1343 * Checks that the value conforms to the LanguageID production:
1344 *
1345 * NOTE: this is somewhat deprecated, those productions were removed from
1346 * the XML Second edition.
1347 *
1348 * [33] LanguageID ::= Langcode ('-' Subcode)*
1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353 * [38] Subcode ::= ([a-z] | [A-Z])+
1354 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001355 * The current REC reference the sucessors of RFC 1766, currently 5646
1356 *
1357 * http://www.rfc-editor.org/rfc/rfc5646.txt
1358 * langtag = language
1359 * ["-" script]
1360 * ["-" region]
1361 * *("-" variant)
1362 * *("-" extension)
1363 * ["-" privateuse]
1364 * language = 2*3ALPHA ; shortest ISO 639 code
1365 * ["-" extlang] ; sometimes followed by
1366 * ; extended language subtags
1367 * / 4ALPHA ; or reserved for future use
1368 * / 5*8ALPHA ; or registered language subtag
1369 *
1370 * extlang = 3ALPHA ; selected ISO 639 codes
1371 * *2("-" 3ALPHA) ; permanently reserved
1372 *
1373 * script = 4ALPHA ; ISO 15924 code
1374 *
1375 * region = 2ALPHA ; ISO 3166-1 code
1376 * / 3DIGIT ; UN M.49 code
1377 *
1378 * variant = 5*8alphanum ; registered variants
1379 * / (DIGIT 3alphanum)
1380 *
1381 * extension = singleton 1*("-" (2*8alphanum))
1382 *
1383 * ; Single alphanumerics
1384 * ; "x" reserved for private use
1385 * singleton = DIGIT ; 0 - 9
1386 * / %x41-57 ; A - W
1387 * / %x59-5A ; Y - Z
1388 * / %x61-77 ; a - w
1389 * / %x79-7A ; y - z
1390 *
1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392 * The parser below doesn't try to cope with extension or privateuse
1393 * that could be added but that's not interoperable anyway
1394 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001395 * Returns 1 if correct 0 otherwise
1396 **/
1397int
1398xmlCheckLanguageID(const xmlChar * lang)
1399{
Daniel Veillard60587d62010-11-04 15:16:27 +01001400 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001401
1402 if (cur == NULL)
1403 return (0);
1404 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001405 ((cur[0] == 'I') && (cur[1] == '-')) ||
1406 ((cur[0] == 'x') && (cur[1] == '-')) ||
1407 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001408 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001409 * Still allow IANA code and user code which were coming
1410 * from the previous version of the XML-1.0 specification
1411 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001412 */
1413 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001415 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001417 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001418 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001419 nxt = cur;
1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422 nxt++;
1423 if (nxt - cur >= 4) {
1424 /*
1425 * Reserved
1426 */
1427 if ((nxt - cur > 8) || (nxt[0] != 0))
1428 return(0);
1429 return(1);
1430 }
1431 if (nxt - cur < 2)
1432 return(0);
1433 /* we got an ISO 639 code */
1434 if (nxt[0] == 0)
1435 return(1);
1436 if (nxt[0] != '-')
1437 return(0);
1438
1439 nxt++;
1440 cur = nxt;
1441 /* now we can have extlang or script or region or variant */
1442 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443 goto region_m49;
1444
1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447 nxt++;
1448 if (nxt - cur == 4)
1449 goto script;
1450 if (nxt - cur == 2)
1451 goto region;
1452 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453 goto variant;
1454 if (nxt - cur != 3)
1455 return(0);
1456 /* we parsed an extlang */
1457 if (nxt[0] == 0)
1458 return(1);
1459 if (nxt[0] != '-')
1460 return(0);
1461
1462 nxt++;
1463 cur = nxt;
1464 /* now we can have script or region or variant */
1465 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466 goto region_m49;
1467
1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470 nxt++;
1471 if (nxt - cur == 2)
1472 goto region;
1473 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474 goto variant;
1475 if (nxt - cur != 4)
1476 return(0);
1477 /* we parsed a script */
1478script:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483
1484 nxt++;
1485 cur = nxt;
1486 /* now we can have region or variant */
1487 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488 goto region_m49;
1489
1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492 nxt++;
1493
1494 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495 goto variant;
1496 if (nxt - cur != 2)
1497 return(0);
1498 /* we parsed a region */
1499region:
1500 if (nxt[0] == 0)
1501 return(1);
1502 if (nxt[0] != '-')
1503 return(0);
1504
1505 nxt++;
1506 cur = nxt;
1507 /* now we can just have a variant */
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511
1512 if ((nxt - cur < 5) || (nxt - cur > 8))
1513 return(0);
1514
1515 /* we parsed a variant */
1516variant:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001522 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001523
1524region_m49:
1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527 nxt += 3;
1528 goto region;
1529 }
1530 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001531}
1532
Owen Taylor3473f882001-02-23 17:55:21 +00001533/************************************************************************
1534 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001535 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001536 * *
1537 ************************************************************************/
1538
Daniel Veillard8ed10722009-08-20 19:17:36 +02001539static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001541
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542#ifdef SAX2
1543/**
1544 * nsPush:
1545 * @ctxt: an XML parser context
1546 * @prefix: the namespace prefix or NULL
1547 * @URL: the namespace name
1548 *
1549 * Pushes a new parser namespace on top of the ns stack
1550 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001551 * Returns -1 in case of error, -2 if the namespace should be discarded
1552 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 */
1554static int
1555nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001557 if (ctxt->options & XML_PARSE_NSCLEAN) {
1558 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001560 if (ctxt->nsTab[i] == prefix) {
1561 /* in scope */
1562 if (ctxt->nsTab[i + 1] == URL)
1563 return(-2);
1564 /* out of scope keep it */
1565 break;
1566 }
1567 }
1568 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570 ctxt->nsMax = 10;
1571 ctxt->nsNr = 0;
1572 ctxt->nsTab = (const xmlChar **)
1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001575 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001576 ctxt->nsMax = 0;
1577 return (-1);
1578 }
1579 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001580 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001586 ctxt->nsMax /= 2;
1587 return (-1);
1588 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001589 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 }
1591 ctxt->nsTab[ctxt->nsNr++] = prefix;
1592 ctxt->nsTab[ctxt->nsNr++] = URL;
1593 return (ctxt->nsNr);
1594}
1595/**
1596 * nsPop:
1597 * @ctxt: an XML parser context
1598 * @nr: the number to pop
1599 *
1600 * Pops the top @nr parser prefix/namespace from the ns stack
1601 *
1602 * Returns the number of namespaces removed
1603 */
1604static int
1605nsPop(xmlParserCtxtPtr ctxt, int nr)
1606{
1607 int i;
1608
1609 if (ctxt->nsTab == NULL) return(0);
1610 if (ctxt->nsNr < nr) {
1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612 nr = ctxt->nsNr;
1613 }
1614 if (ctxt->nsNr <= 0)
1615 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001616
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 for (i = 0;i < nr;i++) {
1618 ctxt->nsNr--;
1619 ctxt->nsTab[ctxt->nsNr] = NULL;
1620 }
1621 return(nr);
1622}
1623#endif
1624
1625static int
1626xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001628 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001629 int maxatts;
1630
1631 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001633 atts = (const xmlChar **)
1634 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638 if (attallocs == NULL) goto mem_error;
1639 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001640 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001641 } else if (nr + 5 > ctxt->maxatts) {
1642 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001645 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001646 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648 (maxatts / 5) * sizeof(int));
1649 if (attallocs == NULL) goto mem_error;
1650 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001651 ctxt->maxatts = maxatts;
1652 }
1653 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001654mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657}
1658
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659/**
1660 * inputPush:
1661 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001662 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 *
1664 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001665 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001666 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001667 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001668int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001671 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001672 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 if (ctxt->inputNr >= ctxt->inputMax) {
1674 ctxt->inputMax *= 2;
1675 ctxt->inputTab =
1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677 ctxt->inputMax *
1678 sizeof(ctxt->inputTab[0]));
1679 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001681 xmlFreeInputStream(value);
1682 ctxt->inputMax /= 2;
1683 value = NULL;
1684 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001685 }
1686 }
1687 ctxt->inputTab[ctxt->inputNr] = value;
1688 ctxt->input = value;
1689 return (ctxt->inputNr++);
1690}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001693 * @ctxt: an XML parser context
1694 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001696 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001699xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700inputPop(xmlParserCtxtPtr ctxt)
1701{
1702 xmlParserInputPtr ret;
1703
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001704 if (ctxt == NULL)
1705 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001707 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 ctxt->inputNr--;
1709 if (ctxt->inputNr > 0)
1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711 else
1712 ctxt->input = NULL;
1713 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001714 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715 return (ret);
1716}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001717/**
1718 * nodePush:
1719 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001720 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 *
1722 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001723 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001724 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001729 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001731 xmlNodePtr *tmp;
1732
1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001736 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001737 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001738 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001739 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001740 ctxt->nodeTab = tmp;
1741 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001742 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001747 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001748 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001749 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001750 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001751 ctxt->nodeTab[ctxt->nodeNr] = value;
1752 ctxt->node = value;
1753 return (ctxt->nodeNr++);
1754}
Daniel Veillard8915c152008-08-26 13:05:34 +00001755
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756/**
1757 * nodePop:
1758 * @ctxt: an XML parser context
1759 *
1760 * Pops the top element node from the node stack
1761 *
1762 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001763 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001764xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765nodePop(xmlParserCtxtPtr ctxt)
1766{
1767 xmlNodePtr ret;
1768
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001771 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001772 ctxt->nodeNr--;
1773 if (ctxt->nodeNr > 0)
1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775 else
1776 ctxt->node = NULL;
1777 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001778 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 return (ret);
1780}
Daniel Veillarda2351322004-06-27 12:08:10 +00001781
1782#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001784 * nameNsPush:
1785 * @ctxt: an XML parser context
1786 * @value: the element name
1787 * @prefix: the element prefix
1788 * @URI: the element namespace name
1789 *
1790 * Pushes a new element name/prefix/URL on top of the name stack
1791 *
1792 * Returns -1 in case of error, the index in the stack otherwise
1793 */
1794static int
1795nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797{
1798 if (ctxt->nameNr >= ctxt->nameMax) {
1799 const xmlChar * *tmp;
1800 void **tmp2;
1801 ctxt->nameMax *= 2;
1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803 ctxt->nameMax *
1804 sizeof(ctxt->nameTab[0]));
1805 if (tmp == NULL) {
1806 ctxt->nameMax /= 2;
1807 goto mem_error;
1808 }
1809 ctxt->nameTab = tmp;
1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811 ctxt->nameMax * 3 *
1812 sizeof(ctxt->pushTab[0]));
1813 if (tmp2 == NULL) {
1814 ctxt->nameMax /= 2;
1815 goto mem_error;
1816 }
1817 ctxt->pushTab = tmp2;
1818 }
1819 ctxt->nameTab[ctxt->nameNr] = value;
1820 ctxt->name = value;
1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 return (ctxt->nameNr++);
1825mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 return (-1);
1828}
1829/**
1830 * nameNsPop:
1831 * @ctxt: an XML parser context
1832 *
1833 * Pops the top element/prefix/URI name from the name stack
1834 *
1835 * Returns the name just removed
1836 */
1837static const xmlChar *
1838nameNsPop(xmlParserCtxtPtr ctxt)
1839{
1840 const xmlChar *ret;
1841
1842 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001843 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844 ctxt->nameNr--;
1845 if (ctxt->nameNr > 0)
1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847 else
1848 ctxt->name = NULL;
1849 ret = ctxt->nameTab[ctxt->nameNr];
1850 ctxt->nameTab[ctxt->nameNr] = NULL;
1851 return (ret);
1852}
Daniel Veillarda2351322004-06-27 12:08:10 +00001853#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854
1855/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001856 * namePush:
1857 * @ctxt: an XML parser context
1858 * @value: the element name
1859 *
1860 * Pushes a new element name on top of the name stack
1861 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001864int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001865namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001866{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001867 if (ctxt == NULL) return (-1);
1868
Daniel Veillard1c732d22002-11-30 11:22:59 +00001869 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001872 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001873 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001874 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001875 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001878 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001879 }
1880 ctxt->nameTab[ctxt->nameNr] = value;
1881 ctxt->name = value;
1882 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001883mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001884 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886}
1887/**
1888 * namePop:
1889 * @ctxt: an XML parser context
1890 *
1891 * Pops the top element name from the name stack
1892 *
1893 * Returns the name just removed
1894 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001895const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896namePop(xmlParserCtxtPtr ctxt)
1897{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001898 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001899
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001900 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 ctxt->nameNr--;
1903 if (ctxt->nameNr > 0)
1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905 else
1906 ctxt->name = NULL;
1907 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001908 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 return (ret);
1910}
Owen Taylor3473f882001-02-23 17:55:21 +00001911
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001912static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001913 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001914 int *tmp;
1915
Owen Taylor3473f882001-02-23 17:55:21 +00001916 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001917 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001920 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001921 ctxt->spaceMax /=2;
1922 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001923 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001924 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 }
1926 ctxt->spaceTab[ctxt->spaceNr] = val;
1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928 return(ctxt->spaceNr++);
1929}
1930
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001931static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 int ret;
1933 if (ctxt->spaceNr <= 0) return(0);
1934 ctxt->spaceNr--;
1935 if (ctxt->spaceNr > 0)
1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001938 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001939 ret = ctxt->spaceTab[ctxt->spaceNr];
1940 ctxt->spaceTab[ctxt->spaceNr] = -1;
1941 return(ret);
1942}
1943
1944/*
1945 * Macros for accessing the content. Those should be used only by the parser,
1946 * and not exported.
1947 *
1948 * Dirty macros, i.e. one often need to make assumption on the context to
1949 * use them
1950 *
1951 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952 * To be used with extreme caution since operations consuming
1953 * characters may move the input buffer to a different location !
1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955 * This should be used internally by the parser
1956 * only to compare to ASCII values otherwise it would break when
1957 * running with UTF-8 encoding.
1958 * RAW same as CUR but in the input buffer, bypass any token
1959 * extraction that may have been done
1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961 * to compare on ASCII based substring.
1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001963 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967 *
1968 * NEXT Skip to the next character, this does the proper decoding
1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001970 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001971 * CUR_CHAR(l) returns the current unicode character (int), set l
1972 * to the number of xmlChars used for the encoding [0-5].
1973 * CUR_SCHAR same but operate on a string instead of the context
1974 * COPY_BUF copy the current unicode char to the target buffer, increment
1975 * the index
1976 * GROW, SHRINK handling of input buffers
1977 */
1978
Daniel Veillardfdc91562002-07-01 21:52:03 +00001979#define RAW (*ctxt->input->cur)
1980#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001981#define NXT(val) ctxt->input->cur[(val)]
1982#define CUR_PTR ctxt->input->cur
1983
Daniel Veillarda07050d2003-10-19 14:46:32 +00001984#define CMP4( s, c1, c2, c3, c4 ) \
1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997 ((unsigned char *) s)[ 8 ] == c9 )
1998#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000 ((unsigned char *) s)[ 9 ] == c10 )
2001
Owen Taylor3473f882001-02-23 17:55:21 +00002002#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002005 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007 xmlPopInput(ctxt); \
2008 } while (0)
2009
Daniel Veillard0b787f32004-03-26 17:29:53 +00002010#define SKIPL(val) do { \
2011 int skipl; \
2012 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002013 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002014 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002015 } else ctxt->input->col++; \
2016 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002017 ctxt->input->cur++; \
2018 } \
2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 if ((*ctxt->input->cur == 0) && \
2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022 xmlPopInput(ctxt); \
2023 } while (0)
2024
Daniel Veillarda880b122003-04-21 21:36:41 +00002025#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002028 xmlSHRINK (ctxt);
2029
2030static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031 xmlParserInputShrink(ctxt->input);
2032 if ((*ctxt->input->cur == 0) &&
2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002035 }
Owen Taylor3473f882001-02-23 17:55:21 +00002036
Daniel Veillarda880b122003-04-21 21:36:41 +00002037#define GROW if ((ctxt->progressive == 0) && \
2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002039 xmlGROW (ctxt);
2040
2041static void xmlGROW (xmlParserCtxtPtr ctxt) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002042 if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002044 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002045 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002047 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002048 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002049 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002050 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002051 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2052 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002053}
Owen Taylor3473f882001-02-23 17:55:21 +00002054
2055#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2056
2057#define NEXT xmlNextChar(ctxt)
2058
Daniel Veillard21a0f912001-02-25 19:54:14 +00002059#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002060 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002061 ctxt->input->cur++; \
2062 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002063 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002064 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2065 }
2066
Owen Taylor3473f882001-02-23 17:55:21 +00002067#define NEXTL(l) do { \
2068 if (*(ctxt->input->cur) == '\n') { \
2069 ctxt->input->line++; ctxt->input->col = 1; \
2070 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002071 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002072 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002073 } while (0)
2074
2075#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2077
2078#define COPY_BUF(l,b,i,v) \
2079 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002080 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002081
2082/**
2083 * xmlSkipBlankChars:
2084 * @ctxt: the XML parser context
2085 *
2086 * skip all blanks character found at that point in the input streams.
2087 * It pops up finished entities in the process if allowable at that point.
2088 *
2089 * Returns the number of space chars skipped
2090 */
2091
2092int
2093xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002094 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002095
2096 /*
2097 * It's Okay to use CUR/NEXT here since all the blanks are on
2098 * the ASCII range.
2099 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002100 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2101 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002102 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002103 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002104 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002105 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002106 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002107 if (*cur == '\n') {
2108 ctxt->input->line++; ctxt->input->col = 1;
2109 }
2110 cur++;
2111 res++;
2112 if (*cur == 0) {
2113 ctxt->input->cur = cur;
2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115 cur = ctxt->input->cur;
2116 }
2117 }
2118 ctxt->input->cur = cur;
2119 } else {
2120 int cur;
2121 do {
2122 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002123 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002124 NEXT;
2125 cur = CUR;
2126 res++;
2127 }
2128 while ((cur == 0) && (ctxt->inputNr > 1) &&
2129 (ctxt->instate != XML_PARSER_COMMENT)) {
2130 xmlPopInput(ctxt);
2131 cur = CUR;
2132 }
2133 /*
2134 * Need to handle support of entities branching here
2135 */
2136 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2138 }
Owen Taylor3473f882001-02-23 17:55:21 +00002139 return(res);
2140}
2141
2142/************************************************************************
2143 * *
2144 * Commodity functions to handle entities *
2145 * *
2146 ************************************************************************/
2147
2148/**
2149 * xmlPopInput:
2150 * @ctxt: an XML parser context
2151 *
2152 * xmlPopInput: the current input pointed by ctxt->input came to an end
2153 * pop it and return the next char.
2154 *
2155 * Returns the current xmlChar in the parser context
2156 */
2157xmlChar
2158xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002159 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002160 if (xmlParserDebugEntities)
2161 xmlGenericError(xmlGenericErrorContext,
2162 "Popping input %d\n", ctxt->inputNr);
2163 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002164 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002165 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166 return(xmlPopInput(ctxt));
2167 return(CUR);
2168}
2169
2170/**
2171 * xmlPushInput:
2172 * @ctxt: an XML parser context
2173 * @input: an XML parser input fragment (entity, XML fragment ...).
2174 *
2175 * xmlPushInput: switch to a new input stream which is stacked on top
2176 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002177 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002178 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002179int
Owen Taylor3473f882001-02-23 17:55:21 +00002180xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002181 int ret;
2182 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002183
2184 if (xmlParserDebugEntities) {
2185 if ((ctxt->input != NULL) && (ctxt->input->filename))
2186 xmlGenericError(xmlGenericErrorContext,
2187 "%s(%d): ", ctxt->input->filename,
2188 ctxt->input->line);
2189 xmlGenericError(xmlGenericErrorContext,
2190 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2191 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002192 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002193 if (ctxt->instate == XML_PARSER_EOF)
2194 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002195 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002196 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002197}
2198
2199/**
2200 * xmlParseCharRef:
2201 * @ctxt: an XML parser context
2202 *
2203 * parse Reference declarations
2204 *
2205 * [66] CharRef ::= '&#' [0-9]+ ';' |
2206 * '&#x' [0-9a-fA-F]+ ';'
2207 *
2208 * [ WFC: Legal Character ]
2209 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002210 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002211 *
2212 * Returns the value parsed (as an int), 0 in case of error
2213 */
2214int
2215xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002216 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002217 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002218 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002219
Owen Taylor3473f882001-02-23 17:55:21 +00002220 /*
2221 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2222 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002223 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002224 (NXT(2) == 'x')) {
2225 SKIP(3);
2226 GROW;
2227 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002228 if (count++ > 20) {
2229 count = 0;
2230 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002231 if (ctxt->instate == XML_PARSER_EOF)
2232 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002233 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002234 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002235 val = val * 16 + (CUR - '0');
2236 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237 val = val * 16 + (CUR - 'a') + 10;
2238 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239 val = val * 16 + (CUR - 'A') + 10;
2240 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002241 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 val = 0;
2243 break;
2244 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002245 if (val > 0x10FFFF)
2246 outofrange = val;
2247
Owen Taylor3473f882001-02-23 17:55:21 +00002248 NEXT;
2249 count++;
2250 }
2251 if (RAW == ';') {
2252 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002253 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002254 ctxt->nbChars ++;
2255 ctxt->input->cur++;
2256 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002257 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002258 SKIP(2);
2259 GROW;
2260 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002261 if (count++ > 20) {
2262 count = 0;
2263 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002264 if (ctxt->instate == XML_PARSER_EOF)
2265 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002266 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002267 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002268 val = val * 10 + (CUR - '0');
2269 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002271 val = 0;
2272 break;
2273 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002274 if (val > 0x10FFFF)
2275 outofrange = val;
2276
Owen Taylor3473f882001-02-23 17:55:21 +00002277 NEXT;
2278 count++;
2279 }
2280 if (RAW == ';') {
2281 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002282 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002283 ctxt->nbChars ++;
2284 ctxt->input->cur++;
2285 }
2286 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002287 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002288 }
2289
2290 /*
2291 * [ WFC: Legal Character ]
2292 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002293 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002294 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002295 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002296 return(val);
2297 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002298 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299 "xmlParseCharRef: invalid xmlChar value %d\n",
2300 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002301 }
2302 return(0);
2303}
2304
2305/**
2306 * xmlParseStringCharRef:
2307 * @ctxt: an XML parser context
2308 * @str: a pointer to an index in the string
2309 *
2310 * parse Reference declarations, variant parsing from a string rather
2311 * than an an input flow.
2312 *
2313 * [66] CharRef ::= '&#' [0-9]+ ';' |
2314 * '&#x' [0-9a-fA-F]+ ';'
2315 *
2316 * [ WFC: Legal Character ]
2317 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002318 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002319 *
2320 * Returns the value parsed (as an int), 0 in case of error, str will be
2321 * updated to the current value of the index
2322 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002323static int
Owen Taylor3473f882001-02-23 17:55:21 +00002324xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2325 const xmlChar *ptr;
2326 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002327 unsigned int val = 0;
2328 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002329
2330 if ((str == NULL) || (*str == NULL)) return(0);
2331 ptr = *str;
2332 cur = *ptr;
2333 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2334 ptr += 3;
2335 cur = *ptr;
2336 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002337 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002338 val = val * 16 + (cur - '0');
2339 else if ((cur >= 'a') && (cur <= 'f'))
2340 val = val * 16 + (cur - 'a') + 10;
2341 else if ((cur >= 'A') && (cur <= 'F'))
2342 val = val * 16 + (cur - 'A') + 10;
2343 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002344 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002345 val = 0;
2346 break;
2347 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002348 if (val > 0x10FFFF)
2349 outofrange = val;
2350
Owen Taylor3473f882001-02-23 17:55:21 +00002351 ptr++;
2352 cur = *ptr;
2353 }
2354 if (cur == ';')
2355 ptr++;
2356 } else if ((cur == '&') && (ptr[1] == '#')){
2357 ptr += 2;
2358 cur = *ptr;
2359 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002360 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002361 val = val * 10 + (cur - '0');
2362 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002363 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002364 val = 0;
2365 break;
2366 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002367 if (val > 0x10FFFF)
2368 outofrange = val;
2369
Owen Taylor3473f882001-02-23 17:55:21 +00002370 ptr++;
2371 cur = *ptr;
2372 }
2373 if (cur == ';')
2374 ptr++;
2375 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002376 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002377 return(0);
2378 }
2379 *str = ptr;
2380
2381 /*
2382 * [ WFC: Legal Character ]
2383 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002384 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002385 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002386 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002387 return(val);
2388 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002389 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2391 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002392 }
2393 return(0);
2394}
2395
2396/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002397 * xmlNewBlanksWrapperInputStream:
2398 * @ctxt: an XML parser context
2399 * @entity: an Entity pointer
2400 *
2401 * Create a new input stream for wrapping
2402 * blanks around a PEReference
2403 *
2404 * Returns the new input stream or NULL
2405 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002406
Daniel Veillardf5582f12002-06-11 10:08:16 +00002407static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002408
Daniel Veillardf4862f02002-09-10 11:13:43 +00002409static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002410xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411 xmlParserInputPtr input;
2412 xmlChar *buffer;
2413 size_t length;
2414 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002417 return(NULL);
2418 }
2419 if (xmlParserDebugEntities)
2420 xmlGenericError(xmlGenericErrorContext,
2421 "new blanks wrapper for entity: %s\n", entity->name);
2422 input = xmlNewInputStream(ctxt);
2423 if (input == NULL) {
2424 return(NULL);
2425 }
2426 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002427 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002428 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002429 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002430 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002431 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002432 }
2433 buffer [0] = ' ';
2434 buffer [1] = '%';
2435 buffer [length-3] = ';';
2436 buffer [length-2] = ' ';
2437 buffer [length-1] = 0;
2438 memcpy(buffer + 2, entity->name, length - 5);
2439 input->free = deallocblankswrapper;
2440 input->base = buffer;
2441 input->cur = buffer;
2442 input->length = length;
2443 input->end = &buffer[length];
2444 return(input);
2445}
2446
2447/**
Owen Taylor3473f882001-02-23 17:55:21 +00002448 * xmlParserHandlePEReference:
2449 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002450 *
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * [69] PEReference ::= '%' Name ';'
2452 *
2453 * [ WFC: No Recursion ]
2454 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002455 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002456 *
2457 * [ WFC: Entity Declared ]
2458 * In a document without any DTD, a document with only an internal DTD
2459 * subset which contains no parameter entity references, or a document
2460 * with "standalone='yes'", ... ... The declaration of a parameter
2461 * entity must precede any reference to it...
2462 *
2463 * [ VC: Entity Declared ]
2464 * In a document with an external subset or external parameter entities
2465 * with "standalone='no'", ... ... The declaration of a parameter entity
2466 * must precede any reference to it...
2467 *
2468 * [ WFC: In DTD ]
2469 * Parameter-entity references may only appear in the DTD.
2470 * NOTE: misleading but this is handled.
2471 *
2472 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002473 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002474 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002475 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002476 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002477 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002478 */
2479void
2480xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002481 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002482 xmlEntityPtr entity = NULL;
2483 xmlParserInputPtr input;
2484
Owen Taylor3473f882001-02-23 17:55:21 +00002485 if (RAW != '%') return;
2486 switch(ctxt->instate) {
2487 case XML_PARSER_CDATA_SECTION:
2488 return;
2489 case XML_PARSER_COMMENT:
2490 return;
2491 case XML_PARSER_START_TAG:
2492 return;
2493 case XML_PARSER_END_TAG:
2494 return;
2495 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002496 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002497 return;
2498 case XML_PARSER_PROLOG:
2499 case XML_PARSER_START:
2500 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002501 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002502 return;
2503 case XML_PARSER_ENTITY_DECL:
2504 case XML_PARSER_CONTENT:
2505 case XML_PARSER_ATTRIBUTE_VALUE:
2506 case XML_PARSER_PI:
2507 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002508 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002509 /* we just ignore it there */
2510 return;
2511 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002512 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002513 return;
2514 case XML_PARSER_ENTITY_VALUE:
2515 /*
2516 * NOTE: in the case of entity values, we don't do the
2517 * substitution here since we need the literal
2518 * entity value to be able to save the internal
2519 * subset of the document.
2520 * This will be handled by xmlStringDecodeEntities
2521 */
2522 return;
2523 case XML_PARSER_DTD:
2524 /*
2525 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526 * In the internal DTD subset, parameter-entity references
2527 * can occur only where markup declarations can occur, not
2528 * within markup declarations.
2529 * In that case this is handled in xmlParseMarkupDecl
2530 */
2531 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2532 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002533 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002534 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002535 break;
2536 case XML_PARSER_IGNORE:
2537 return;
2538 }
2539
2540 NEXT;
2541 name = xmlParseName(ctxt);
2542 if (xmlParserDebugEntities)
2543 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002544 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002545 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002546 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002547 } else {
2548 if (RAW == ';') {
2549 NEXT;
2550 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2552 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002553
Owen Taylor3473f882001-02-23 17:55:21 +00002554 /*
2555 * [ WFC: Entity Declared ]
2556 * In a document without any DTD, a document with only an
2557 * internal DTD subset which contains no parameter entity
2558 * references, or a document with "standalone='yes'", ...
2559 * ... The declaration of a parameter entity must precede
2560 * any reference to it...
2561 */
2562 if ((ctxt->standalone == 1) ||
2563 ((ctxt->hasExternalSubset == 0) &&
2564 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002565 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002566 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002567 } else {
2568 /*
2569 * [ VC: Entity Declared ]
2570 * In a document with an external subset or external
2571 * parameter entities with "standalone='no'", ...
2572 * ... The declaration of a parameter entity must precede
2573 * any reference to it...
2574 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002575 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2576 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2577 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002578 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002579 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002580 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2581 "PEReference: %%%s; not found\n",
2582 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002583 ctxt->valid = 0;
2584 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002585 } else if (ctxt->input->free != deallocblankswrapper) {
2586 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002587 if (xmlPushInput(ctxt, input) < 0)
2588 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002589 } else {
2590 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2591 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002592 xmlChar start[4];
2593 xmlCharEncoding enc;
2594
Owen Taylor3473f882001-02-23 17:55:21 +00002595 /*
2596 * handle the extra spaces added before and after
2597 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002598 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002599 */
2600 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002601 if (xmlPushInput(ctxt, input) < 0)
2602 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002603
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002604 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002605 * Get the 4 first bytes and decode the charset
2606 * if enc != XML_CHAR_ENCODING_NONE
2607 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002608 * Note that, since we may have some non-UTF8
2609 * encoding (like UTF16, bug 135229), the 'length'
2610 * is not known, but we can calculate based upon
2611 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002612 */
2613 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002614 if (ctxt->instate == XML_PARSER_EOF)
2615 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002616 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002617 start[0] = RAW;
2618 start[1] = NXT(1);
2619 start[2] = NXT(2);
2620 start[3] = NXT(3);
2621 enc = xmlDetectCharEncoding(start, 4);
2622 if (enc != XML_CHAR_ENCODING_NONE) {
2623 xmlSwitchEncoding(ctxt, enc);
2624 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002625 }
2626
Owen Taylor3473f882001-02-23 17:55:21 +00002627 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002628 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2629 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002630 xmlParseTextDecl(ctxt);
2631 }
Owen Taylor3473f882001-02-23 17:55:21 +00002632 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002633 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2634 "PEReference: %s is not a parameter entity\n",
2635 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002636 }
2637 }
2638 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002639 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002640 }
Owen Taylor3473f882001-02-23 17:55:21 +00002641 }
2642}
2643
2644/*
2645 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002646 * buffer##_size is expected to be a size_t
2647 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002648 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002649#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002650 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002651 size_t new_size = buffer##_size * 2 + n; \
2652 if (new_size < buffer##_size) goto mem_error; \
2653 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002654 if (tmp == NULL) goto mem_error; \
2655 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002656 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002657}
2658
2659/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002660 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002661 * @ctxt: the parser context
2662 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002663 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002664 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2665 * @end: an end marker xmlChar, 0 if none
2666 * @end2: an end marker xmlChar, 0 if none
2667 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002668 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002669 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002670 *
2671 * [67] Reference ::= EntityRef | CharRef
2672 *
2673 * [69] PEReference ::= '%' Name ';'
2674 *
2675 * Returns A newly allocated string with the substitution done. The caller
2676 * must deallocate it !
2677 */
2678xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002679xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2680 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002681 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002682 size_t buffer_size = 0;
2683 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002684
2685 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002686 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002687 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002688 xmlEntityPtr ent;
2689 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002690
Daniel Veillarda82b1822004-11-08 16:24:57 +00002691 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002692 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002693 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002694
Daniel Veillard0161e632008-08-28 15:36:32 +00002695 if (((ctxt->depth > 40) &&
2696 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2697 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002698 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002699 return(NULL);
2700 }
2701
2702 /*
2703 * allocate a translation buffer.
2704 */
2705 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002706 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002707 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002708
2709 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002710 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002711 * we are operating on already parsed values.
2712 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002713 if (str < last)
2714 c = CUR_SCHAR(str, l);
2715 else
2716 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002717 while ((c != 0) && (c != end) && /* non input consuming loop */
2718 (c != end2) && (c != end3)) {
2719
2720 if (c == 0) break;
2721 if ((c == '&') && (str[1] == '#')) {
2722 int val = xmlParseStringCharRef(ctxt, &str);
2723 if (val != 0) {
2724 COPY_BUF(0,buffer,nbchars,val);
2725 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002726 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002727 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002728 }
Owen Taylor3473f882001-02-23 17:55:21 +00002729 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2730 if (xmlParserDebugEntities)
2731 xmlGenericError(xmlGenericErrorContext,
2732 "String decoding Entity Reference: %.30s\n",
2733 str);
2734 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002735 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2736 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002737 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002738 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002739 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002740 if ((ent != NULL) &&
2741 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2742 if (ent->content != NULL) {
2743 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002745 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002746 }
Owen Taylor3473f882001-02-23 17:55:21 +00002747 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002748 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2749 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002750 }
2751 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002752 ctxt->depth++;
2753 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2754 0, 0, 0);
2755 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002756
Owen Taylor3473f882001-02-23 17:55:21 +00002757 if (rep != NULL) {
2758 current = rep;
2759 while (*current != 0) { /* non input consuming loop */
2760 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002761 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002762 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002763 goto int_error;
2764 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002765 }
2766 }
2767 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002768 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002769 }
2770 } else if (ent != NULL) {
2771 int i = xmlStrlen(ent->name);
2772 const xmlChar *cur = ent->name;
2773
2774 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002775 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002776 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002777 }
2778 for (;i > 0;i--)
2779 buffer[nbchars++] = *cur++;
2780 buffer[nbchars++] = ';';
2781 }
2782 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2783 if (xmlParserDebugEntities)
2784 xmlGenericError(xmlGenericErrorContext,
2785 "String decoding PE Reference: %.30s\n", str);
2786 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002787 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2788 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002789 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002790 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002791 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002792 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002793 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002794 }
Owen Taylor3473f882001-02-23 17:55:21 +00002795 ctxt->depth++;
2796 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2797 0, 0, 0);
2798 ctxt->depth--;
2799 if (rep != NULL) {
2800 current = rep;
2801 while (*current != 0) { /* non input consuming loop */
2802 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002803 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002804 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002805 goto int_error;
2806 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002807 }
2808 }
2809 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002810 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002811 }
2812 }
2813 } else {
2814 COPY_BUF(l,buffer,nbchars,c);
2815 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002816 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2817 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002818 }
2819 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002820 if (str < last)
2821 c = CUR_SCHAR(str, l);
2822 else
2823 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002824 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002825 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002826 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002827
2828mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002829 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002830int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002831 if (rep != NULL)
2832 xmlFree(rep);
2833 if (buffer != NULL)
2834 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002835 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002836}
2837
Daniel Veillarde57ec792003-09-10 10:50:59 +00002838/**
2839 * xmlStringDecodeEntities:
2840 * @ctxt: the parser context
2841 * @str: the input string
2842 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2843 * @end: an end marker xmlChar, 0 if none
2844 * @end2: an end marker xmlChar, 0 if none
2845 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002846 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002847 * Takes a entity string content and process to do the adequate substitutions.
2848 *
2849 * [67] Reference ::= EntityRef | CharRef
2850 *
2851 * [69] PEReference ::= '%' Name ';'
2852 *
2853 * Returns A newly allocated string with the substitution done. The caller
2854 * must deallocate it !
2855 */
2856xmlChar *
2857xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2858 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002859 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002860 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2861 end, end2, end3));
2862}
Owen Taylor3473f882001-02-23 17:55:21 +00002863
2864/************************************************************************
2865 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002866 * Commodity functions, cleanup needed ? *
2867 * *
2868 ************************************************************************/
2869
2870/**
2871 * areBlanks:
2872 * @ctxt: an XML parser context
2873 * @str: a xmlChar *
2874 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002875 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002876 *
2877 * Is this a sequence of blank chars that one can ignore ?
2878 *
2879 * Returns 1 if ignorable 0 otherwise.
2880 */
2881
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002882static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2883 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002884 int i, ret;
2885 xmlNodePtr lastChild;
2886
Daniel Veillard05c13a22001-09-09 08:38:09 +00002887 /*
2888 * Don't spend time trying to differentiate them, the same callback is
2889 * used !
2890 */
2891 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002892 return(0);
2893
Owen Taylor3473f882001-02-23 17:55:21 +00002894 /*
2895 * Check for xml:space value.
2896 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002897 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2898 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002899 return(0);
2900
2901 /*
2902 * Check that the string is made of blanks
2903 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002904 if (blank_chars == 0) {
2905 for (i = 0;i < len;i++)
2906 if (!(IS_BLANK_CH(str[i]))) return(0);
2907 }
Owen Taylor3473f882001-02-23 17:55:21 +00002908
2909 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002910 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002911 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002912 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002913 if (ctxt->myDoc != NULL) {
2914 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2915 if (ret == 0) return(1);
2916 if (ret == 1) return(0);
2917 }
2918
2919 /*
2920 * Otherwise, heuristic :-\
2921 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002922 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002923 if ((ctxt->node->children == NULL) &&
2924 (RAW == '<') && (NXT(1) == '/')) return(0);
2925
2926 lastChild = xmlGetLastChild(ctxt->node);
2927 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002928 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2929 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002930 } else if (xmlNodeIsText(lastChild))
2931 return(0);
2932 else if ((ctxt->node->children != NULL) &&
2933 (xmlNodeIsText(ctxt->node->children)))
2934 return(0);
2935 return(1);
2936}
2937
Owen Taylor3473f882001-02-23 17:55:21 +00002938/************************************************************************
2939 * *
2940 * Extra stuff for namespace support *
2941 * Relates to http://www.w3.org/TR/WD-xml-names *
2942 * *
2943 ************************************************************************/
2944
2945/**
2946 * xmlSplitQName:
2947 * @ctxt: an XML parser context
2948 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002949 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002950 *
2951 * parse an UTF8 encoded XML qualified name string
2952 *
2953 * [NS 5] QName ::= (Prefix ':')? LocalPart
2954 *
2955 * [NS 6] Prefix ::= NCName
2956 *
2957 * [NS 7] LocalPart ::= NCName
2958 *
2959 * Returns the local part, and prefix is updated
2960 * to get the Prefix if any.
2961 */
2962
2963xmlChar *
2964xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2965 xmlChar buf[XML_MAX_NAMELEN + 5];
2966 xmlChar *buffer = NULL;
2967 int len = 0;
2968 int max = XML_MAX_NAMELEN;
2969 xmlChar *ret = NULL;
2970 const xmlChar *cur = name;
2971 int c;
2972
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002973 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002974 *prefix = NULL;
2975
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002976 if (cur == NULL) return(NULL);
2977
Owen Taylor3473f882001-02-23 17:55:21 +00002978#ifndef XML_XML_NAMESPACE
2979 /* xml: prefix is not really a namespace */
2980 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2981 (cur[2] == 'l') && (cur[3] == ':'))
2982 return(xmlStrdup(name));
2983#endif
2984
Daniel Veillard597bc482003-07-24 16:08:28 +00002985 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002986 if (cur[0] == ':')
2987 return(xmlStrdup(name));
2988
2989 c = *cur++;
2990 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2991 buf[len++] = c;
2992 c = *cur++;
2993 }
2994 if (len >= max) {
2995 /*
2996 * Okay someone managed to make a huge name, so he's ready to pay
2997 * for the processing speed.
2998 */
2999 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003000
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003001 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003002 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003003 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003004 return(NULL);
3005 }
3006 memcpy(buffer, buf, len);
3007 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3008 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003009 xmlChar *tmp;
3010
Owen Taylor3473f882001-02-23 17:55:21 +00003011 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003012 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003013 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003014 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003015 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003016 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003017 return(NULL);
3018 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003019 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003020 }
3021 buffer[len++] = c;
3022 c = *cur++;
3023 }
3024 buffer[len] = 0;
3025 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003026
Daniel Veillard597bc482003-07-24 16:08:28 +00003027 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003028 if (buffer != NULL)
3029 xmlFree(buffer);
3030 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003031 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003032 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003033
Owen Taylor3473f882001-02-23 17:55:21 +00003034 if (buffer == NULL)
3035 ret = xmlStrndup(buf, len);
3036 else {
3037 ret = buffer;
3038 buffer = NULL;
3039 max = XML_MAX_NAMELEN;
3040 }
3041
3042
3043 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003044 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003045 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003046 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003047 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003048 }
Owen Taylor3473f882001-02-23 17:55:21 +00003049 len = 0;
3050
Daniel Veillardbb284f42002-10-16 18:02:47 +00003051 /*
3052 * Check that the first character is proper to start
3053 * a new name
3054 */
3055 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3056 ((c >= 0x41) && (c <= 0x5A)) ||
3057 (c == '_') || (c == ':'))) {
3058 int l;
3059 int first = CUR_SCHAR(cur, l);
3060
3061 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003062 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003063 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003064 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003065 }
3066 }
3067 cur++;
3068
Owen Taylor3473f882001-02-23 17:55:21 +00003069 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3070 buf[len++] = c;
3071 c = *cur++;
3072 }
3073 if (len >= max) {
3074 /*
3075 * Okay someone managed to make a huge name, so he's ready to pay
3076 * for the processing speed.
3077 */
3078 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003079
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003080 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003081 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003082 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003083 return(NULL);
3084 }
3085 memcpy(buffer, buf, len);
3086 while (c != 0) { /* tested bigname2.xml */
3087 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003088 xmlChar *tmp;
3089
Owen Taylor3473f882001-02-23 17:55:21 +00003090 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003091 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003092 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003093 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003094 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003095 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003096 return(NULL);
3097 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003098 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003099 }
3100 buffer[len++] = c;
3101 c = *cur++;
3102 }
3103 buffer[len] = 0;
3104 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003105
Owen Taylor3473f882001-02-23 17:55:21 +00003106 if (buffer == NULL)
3107 ret = xmlStrndup(buf, len);
3108 else {
3109 ret = buffer;
3110 }
3111 }
3112
3113 return(ret);
3114}
3115
3116/************************************************************************
3117 * *
3118 * The parser itself *
3119 * Relates to http://www.w3.org/TR/REC-xml *
3120 * *
3121 ************************************************************************/
3122
Daniel Veillard34e3f642008-07-29 09:02:27 +00003123/************************************************************************
3124 * *
3125 * Routines to parse Name, NCName and NmToken *
3126 * *
3127 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003128#ifdef DEBUG
3129static unsigned long nbParseName = 0;
3130static unsigned long nbParseNmToken = 0;
3131static unsigned long nbParseNCName = 0;
3132static unsigned long nbParseNCNameComplex = 0;
3133static unsigned long nbParseNameComplex = 0;
3134static unsigned long nbParseStringName = 0;
3135#endif
3136
Daniel Veillard34e3f642008-07-29 09:02:27 +00003137/*
3138 * The two following functions are related to the change of accepted
3139 * characters for Name and NmToken in the Revision 5 of XML-1.0
3140 * They correspond to the modified production [4] and the new production [4a]
3141 * changes in that revision. Also note that the macros used for the
3142 * productions Letter, Digit, CombiningChar and Extender are not needed
3143 * anymore.
3144 * We still keep compatibility to pre-revision5 parsing semantic if the
3145 * new XML_PARSE_OLD10 option is given to the parser.
3146 */
3147static int
3148xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3149 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3150 /*
3151 * Use the new checks of production [4] [4a] amd [5] of the
3152 * Update 5 of XML-1.0
3153 */
3154 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3155 (((c >= 'a') && (c <= 'z')) ||
3156 ((c >= 'A') && (c <= 'Z')) ||
3157 (c == '_') || (c == ':') ||
3158 ((c >= 0xC0) && (c <= 0xD6)) ||
3159 ((c >= 0xD8) && (c <= 0xF6)) ||
3160 ((c >= 0xF8) && (c <= 0x2FF)) ||
3161 ((c >= 0x370) && (c <= 0x37D)) ||
3162 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3163 ((c >= 0x200C) && (c <= 0x200D)) ||
3164 ((c >= 0x2070) && (c <= 0x218F)) ||
3165 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3166 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3167 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3168 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3169 ((c >= 0x10000) && (c <= 0xEFFFF))))
3170 return(1);
3171 } else {
3172 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3173 return(1);
3174 }
3175 return(0);
3176}
3177
3178static int
3179xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3180 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3181 /*
3182 * Use the new checks of production [4] [4a] amd [5] of the
3183 * Update 5 of XML-1.0
3184 */
3185 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3186 (((c >= 'a') && (c <= 'z')) ||
3187 ((c >= 'A') && (c <= 'Z')) ||
3188 ((c >= '0') && (c <= '9')) || /* !start */
3189 (c == '_') || (c == ':') ||
3190 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3191 ((c >= 0xC0) && (c <= 0xD6)) ||
3192 ((c >= 0xD8) && (c <= 0xF6)) ||
3193 ((c >= 0xF8) && (c <= 0x2FF)) ||
3194 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3195 ((c >= 0x370) && (c <= 0x37D)) ||
3196 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3197 ((c >= 0x200C) && (c <= 0x200D)) ||
3198 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3199 ((c >= 0x2070) && (c <= 0x218F)) ||
3200 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3201 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3202 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3203 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3204 ((c >= 0x10000) && (c <= 0xEFFFF))))
3205 return(1);
3206 } else {
3207 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3208 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003209 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003210 (IS_COMBINING(c)) ||
3211 (IS_EXTENDER(c)))
3212 return(1);
3213 }
3214 return(0);
3215}
3216
Daniel Veillarde57ec792003-09-10 10:50:59 +00003217static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003218 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003219
Daniel Veillard34e3f642008-07-29 09:02:27 +00003220static const xmlChar *
3221xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3222 int len = 0, l;
3223 int c;
3224 int count = 0;
3225
Daniel Veillardc6561462009-03-25 10:22:31 +00003226#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003227 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003228#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003229
3230 /*
3231 * Handler for more complex cases
3232 */
3233 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003234 if (ctxt->instate == XML_PARSER_EOF)
3235 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003236 c = CUR_CHAR(l);
3237 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3238 /*
3239 * Use the new checks of production [4] [4a] amd [5] of the
3240 * Update 5 of XML-1.0
3241 */
3242 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3243 (!(((c >= 'a') && (c <= 'z')) ||
3244 ((c >= 'A') && (c <= 'Z')) ||
3245 (c == '_') || (c == ':') ||
3246 ((c >= 0xC0) && (c <= 0xD6)) ||
3247 ((c >= 0xD8) && (c <= 0xF6)) ||
3248 ((c >= 0xF8) && (c <= 0x2FF)) ||
3249 ((c >= 0x370) && (c <= 0x37D)) ||
3250 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3251 ((c >= 0x200C) && (c <= 0x200D)) ||
3252 ((c >= 0x2070) && (c <= 0x218F)) ||
3253 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3254 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3255 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3256 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3257 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3258 return(NULL);
3259 }
3260 len += l;
3261 NEXTL(l);
3262 c = CUR_CHAR(l);
3263 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3264 (((c >= 'a') && (c <= 'z')) ||
3265 ((c >= 'A') && (c <= 'Z')) ||
3266 ((c >= '0') && (c <= '9')) || /* !start */
3267 (c == '_') || (c == ':') ||
3268 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3269 ((c >= 0xC0) && (c <= 0xD6)) ||
3270 ((c >= 0xD8) && (c <= 0xF6)) ||
3271 ((c >= 0xF8) && (c <= 0x2FF)) ||
3272 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3273 ((c >= 0x370) && (c <= 0x37D)) ||
3274 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3275 ((c >= 0x200C) && (c <= 0x200D)) ||
3276 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3277 ((c >= 0x2070) && (c <= 0x218F)) ||
3278 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3279 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3280 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3281 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3282 ((c >= 0x10000) && (c <= 0xEFFFF))
3283 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003284 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003285 count = 0;
3286 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003287 if (ctxt->instate == XML_PARSER_EOF)
3288 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003289 }
3290 len += l;
3291 NEXTL(l);
3292 c = CUR_CHAR(l);
3293 }
3294 } else {
3295 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296 (!IS_LETTER(c) && (c != '_') &&
3297 (c != ':'))) {
3298 return(NULL);
3299 }
3300 len += l;
3301 NEXTL(l);
3302 c = CUR_CHAR(l);
3303
3304 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003307 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003308 (IS_COMBINING(c)) ||
3309 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003310 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003311 count = 0;
3312 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003313 if (ctxt->instate == XML_PARSER_EOF)
3314 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003315 }
3316 len += l;
3317 NEXTL(l);
3318 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003319 if (c == 0) {
3320 count = 0;
3321 GROW;
3322 if (ctxt->instate == XML_PARSER_EOF)
3323 return(NULL);
3324 c = CUR_CHAR(l);
3325 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003326 }
3327 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003328 if ((len > XML_MAX_NAME_LENGTH) &&
3329 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3330 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3331 return(NULL);
3332 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003333 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3334 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3335 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3336}
3337
Owen Taylor3473f882001-02-23 17:55:21 +00003338/**
3339 * xmlParseName:
3340 * @ctxt: an XML parser context
3341 *
3342 * parse an XML name.
3343 *
3344 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3345 * CombiningChar | Extender
3346 *
3347 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3348 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003349 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003350 *
3351 * Returns the Name parsed or NULL
3352 */
3353
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003354const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003355xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003356 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003357 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003358 int count = 0;
3359
3360 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003361
Daniel Veillardc6561462009-03-25 10:22:31 +00003362#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003363 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003364#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003365
Daniel Veillard48b2f892001-02-25 16:11:03 +00003366 /*
3367 * Accelerator for simple ASCII names
3368 */
3369 in = ctxt->input->cur;
3370 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3371 ((*in >= 0x41) && (*in <= 0x5A)) ||
3372 (*in == '_') || (*in == ':')) {
3373 in++;
3374 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3375 ((*in >= 0x41) && (*in <= 0x5A)) ||
3376 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003377 (*in == '_') || (*in == '-') ||
3378 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003379 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003380 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003381 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003382 if ((count > XML_MAX_NAME_LENGTH) &&
3383 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3384 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3385 return(NULL);
3386 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003387 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003388 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003389 ctxt->nbChars += count;
3390 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003391 if (ret == NULL)
3392 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003393 return(ret);
3394 }
3395 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003396 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003397 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003398}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003399
Daniel Veillard34e3f642008-07-29 09:02:27 +00003400static const xmlChar *
3401xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402 int len = 0, l;
3403 int c;
3404 int count = 0;
3405
Daniel Veillardc6561462009-03-25 10:22:31 +00003406#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003407 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003408#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003409
3410 /*
3411 * Handler for more complex cases
3412 */
3413 GROW;
3414 c = CUR_CHAR(l);
3415 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3416 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3417 return(NULL);
3418 }
3419
3420 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3421 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003422 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003423 if ((len > XML_MAX_NAME_LENGTH) &&
3424 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3425 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3426 return(NULL);
3427 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003428 count = 0;
3429 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003430 if (ctxt->instate == XML_PARSER_EOF)
3431 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 }
3433 len += l;
3434 NEXTL(l);
3435 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003436 if (c == 0) {
3437 count = 0;
3438 GROW;
3439 if (ctxt->instate == XML_PARSER_EOF)
3440 return(NULL);
3441 c = CUR_CHAR(l);
3442 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003443 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003444 if ((len > XML_MAX_NAME_LENGTH) &&
3445 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3446 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3447 return(NULL);
3448 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003449 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3450}
3451
3452/**
3453 * xmlParseNCName:
3454 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003455 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003456 *
3457 * parse an XML name.
3458 *
3459 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3460 * CombiningChar | Extender
3461 *
3462 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3463 *
3464 * Returns the Name parsed or NULL
3465 */
3466
3467static const xmlChar *
3468xmlParseNCName(xmlParserCtxtPtr ctxt) {
3469 const xmlChar *in;
3470 const xmlChar *ret;
3471 int count = 0;
3472
Daniel Veillardc6561462009-03-25 10:22:31 +00003473#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003474 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003475#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003476
3477 /*
3478 * Accelerator for simple ASCII names
3479 */
3480 in = ctxt->input->cur;
3481 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3482 ((*in >= 0x41) && (*in <= 0x5A)) ||
3483 (*in == '_')) {
3484 in++;
3485 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3486 ((*in >= 0x41) && (*in <= 0x5A)) ||
3487 ((*in >= 0x30) && (*in <= 0x39)) ||
3488 (*in == '_') || (*in == '-') ||
3489 (*in == '.'))
3490 in++;
3491 if ((*in > 0) && (*in < 0x80)) {
3492 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003493 if ((count > XML_MAX_NAME_LENGTH) &&
3494 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3495 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3496 return(NULL);
3497 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003498 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3499 ctxt->input->cur = in;
3500 ctxt->nbChars += count;
3501 ctxt->input->col += count;
3502 if (ret == NULL) {
3503 xmlErrMemory(ctxt, NULL);
3504 }
3505 return(ret);
3506 }
3507 }
3508 return(xmlParseNCNameComplex(ctxt));
3509}
3510
Daniel Veillard46de64e2002-05-29 08:21:33 +00003511/**
3512 * xmlParseNameAndCompare:
3513 * @ctxt: an XML parser context
3514 *
3515 * parse an XML name and compares for match
3516 * (specialized for endtag parsing)
3517 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003518 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3519 * and the name for mismatch
3520 */
3521
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003522static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003523xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003524 register const xmlChar *cmp = other;
3525 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003526 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003527
3528 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003529 if (ctxt->instate == XML_PARSER_EOF)
3530 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003531
Daniel Veillard46de64e2002-05-29 08:21:33 +00003532 in = ctxt->input->cur;
3533 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003534 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003535 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003536 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003537 }
William M. Brack76e95df2003-10-18 16:20:14 +00003538 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003539 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003540 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003541 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003542 }
3543 /* failure (or end of input buffer), check with full function */
3544 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003545 /* strings coming from the dictionnary direct compare possible */
3546 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003547 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003548 }
3549 return ret;
3550}
3551
Owen Taylor3473f882001-02-23 17:55:21 +00003552/**
3553 * xmlParseStringName:
3554 * @ctxt: an XML parser context
3555 * @str: a pointer to the string pointer (IN/OUT)
3556 *
3557 * parse an XML name.
3558 *
3559 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3560 * CombiningChar | Extender
3561 *
3562 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3563 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003564 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003565 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003566 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003567 * is updated to the current location in the string.
3568 */
3569
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003570static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003571xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3572 xmlChar buf[XML_MAX_NAMELEN + 5];
3573 const xmlChar *cur = *str;
3574 int len = 0, l;
3575 int c;
3576
Daniel Veillardc6561462009-03-25 10:22:31 +00003577#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003578 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003579#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003580
Owen Taylor3473f882001-02-23 17:55:21 +00003581 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003582 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003583 return(NULL);
3584 }
3585
Daniel Veillard34e3f642008-07-29 09:02:27 +00003586 COPY_BUF(l,buf,len,c);
3587 cur += l;
3588 c = CUR_SCHAR(cur, l);
3589 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003590 COPY_BUF(l,buf,len,c);
3591 cur += l;
3592 c = CUR_SCHAR(cur, l);
3593 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3594 /*
3595 * Okay someone managed to make a huge name, so he's ready to pay
3596 * for the processing speed.
3597 */
3598 xmlChar *buffer;
3599 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003600
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003601 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003602 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003603 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003604 return(NULL);
3605 }
3606 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003607 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003608 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003609 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003610
3611 if ((len > XML_MAX_NAME_LENGTH) &&
3612 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3613 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614 xmlFree(buffer);
3615 return(NULL);
3616 }
Owen Taylor3473f882001-02-23 17:55:21 +00003617 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003618 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003619 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003620 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003621 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003622 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003623 return(NULL);
3624 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003625 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003626 }
3627 COPY_BUF(l,buffer,len,c);
3628 cur += l;
3629 c = CUR_SCHAR(cur, l);
3630 }
3631 buffer[len] = 0;
3632 *str = cur;
3633 return(buffer);
3634 }
3635 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003636 if ((len > XML_MAX_NAME_LENGTH) &&
3637 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3638 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3639 return(NULL);
3640 }
Owen Taylor3473f882001-02-23 17:55:21 +00003641 *str = cur;
3642 return(xmlStrndup(buf, len));
3643}
3644
3645/**
3646 * xmlParseNmtoken:
3647 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003648 *
Owen Taylor3473f882001-02-23 17:55:21 +00003649 * parse an XML Nmtoken.
3650 *
3651 * [7] Nmtoken ::= (NameChar)+
3652 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003653 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003654 *
3655 * Returns the Nmtoken parsed or NULL
3656 */
3657
3658xmlChar *
3659xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3660 xmlChar buf[XML_MAX_NAMELEN + 5];
3661 int len = 0, l;
3662 int c;
3663 int count = 0;
3664
Daniel Veillardc6561462009-03-25 10:22:31 +00003665#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003666 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003667#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003668
Owen Taylor3473f882001-02-23 17:55:21 +00003669 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003670 if (ctxt->instate == XML_PARSER_EOF)
3671 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003672 c = CUR_CHAR(l);
3673
Daniel Veillard34e3f642008-07-29 09:02:27 +00003674 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003675 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003676 count = 0;
3677 GROW;
3678 }
3679 COPY_BUF(l,buf,len,c);
3680 NEXTL(l);
3681 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003682 if (c == 0) {
3683 count = 0;
3684 GROW;
3685 if (ctxt->instate == XML_PARSER_EOF)
3686 return(NULL);
3687 c = CUR_CHAR(l);
3688 }
Owen Taylor3473f882001-02-23 17:55:21 +00003689 if (len >= XML_MAX_NAMELEN) {
3690 /*
3691 * Okay someone managed to make a huge token, so he's ready to pay
3692 * for the processing speed.
3693 */
3694 xmlChar *buffer;
3695 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003696
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003697 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003698 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003699 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003700 return(NULL);
3701 }
3702 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003703 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003704 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003705 count = 0;
3706 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003707 if (ctxt->instate == XML_PARSER_EOF) {
3708 xmlFree(buffer);
3709 return(NULL);
3710 }
Owen Taylor3473f882001-02-23 17:55:21 +00003711 }
3712 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003713 xmlChar *tmp;
3714
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003715 if ((max > XML_MAX_NAME_LENGTH) &&
3716 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3717 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3718 xmlFree(buffer);
3719 return(NULL);
3720 }
Owen Taylor3473f882001-02-23 17:55:21 +00003721 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003722 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003723 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003724 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003725 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003726 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003727 return(NULL);
3728 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003729 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003730 }
3731 COPY_BUF(l,buffer,len,c);
3732 NEXTL(l);
3733 c = CUR_CHAR(l);
3734 }
3735 buffer[len] = 0;
3736 return(buffer);
3737 }
3738 }
3739 if (len == 0)
3740 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003741 if ((len > XML_MAX_NAME_LENGTH) &&
3742 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3743 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3744 return(NULL);
3745 }
Owen Taylor3473f882001-02-23 17:55:21 +00003746 return(xmlStrndup(buf, len));
3747}
3748
3749/**
3750 * xmlParseEntityValue:
3751 * @ctxt: an XML parser context
3752 * @orig: if non-NULL store a copy of the original entity value
3753 *
3754 * parse a value for ENTITY declarations
3755 *
3756 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3757 * "'" ([^%&'] | PEReference | Reference)* "'"
3758 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003759 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003760 */
3761
3762xmlChar *
3763xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3764 xmlChar *buf = NULL;
3765 int len = 0;
3766 int size = XML_PARSER_BUFFER_SIZE;
3767 int c, l;
3768 xmlChar stop;
3769 xmlChar *ret = NULL;
3770 const xmlChar *cur = NULL;
3771 xmlParserInputPtr input;
3772
3773 if (RAW == '"') stop = '"';
3774 else if (RAW == '\'') stop = '\'';
3775 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003776 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003777 return(NULL);
3778 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003779 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003780 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003781 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 return(NULL);
3783 }
3784
3785 /*
3786 * The content of the entity definition is copied in a buffer.
3787 */
3788
3789 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3790 input = ctxt->input;
3791 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003792 if (ctxt->instate == XML_PARSER_EOF) {
3793 xmlFree(buf);
3794 return(NULL);
3795 }
Owen Taylor3473f882001-02-23 17:55:21 +00003796 NEXT;
3797 c = CUR_CHAR(l);
3798 /*
3799 * NOTE: 4.4.5 Included in Literal
3800 * When a parameter entity reference appears in a literal entity
3801 * value, ... a single or double quote character in the replacement
3802 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003803 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003804 * In practice it means we stop the loop only when back at parsing
3805 * the initial entity and the quote is found
3806 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003807 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3808 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003809 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003810 xmlChar *tmp;
3811
Owen Taylor3473f882001-02-23 17:55:21 +00003812 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003813 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3814 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003815 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003816 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003817 return(NULL);
3818 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003819 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003820 }
3821 COPY_BUF(l,buf,len,c);
3822 NEXTL(l);
3823 /*
3824 * Pop-up of finished entities.
3825 */
3826 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3827 xmlPopInput(ctxt);
3828
3829 GROW;
3830 c = CUR_CHAR(l);
3831 if (c == 0) {
3832 GROW;
3833 c = CUR_CHAR(l);
3834 }
3835 }
3836 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003837 if (ctxt->instate == XML_PARSER_EOF) {
3838 xmlFree(buf);
3839 return(NULL);
3840 }
Owen Taylor3473f882001-02-23 17:55:21 +00003841
3842 /*
3843 * Raise problem w.r.t. '&' and '%' being used in non-entities
3844 * reference constructs. Note Charref will be handled in
3845 * xmlStringDecodeEntities()
3846 */
3847 cur = buf;
3848 while (*cur != 0) { /* non input consuming */
3849 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3850 xmlChar *name;
3851 xmlChar tmp = *cur;
3852
3853 cur++;
3854 name = xmlParseStringName(ctxt, &cur);
3855 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003856 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003857 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003858 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003859 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003860 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3861 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003862 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003863 }
3864 if (name != NULL)
3865 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003866 if (*cur == 0)
3867 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003868 }
3869 cur++;
3870 }
3871
3872 /*
3873 * Then PEReference entities are substituted.
3874 */
3875 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003876 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003877 xmlFree(buf);
3878 } else {
3879 NEXT;
3880 /*
3881 * NOTE: 4.4.7 Bypassed
3882 * When a general entity reference appears in the EntityValue in
3883 * an entity declaration, it is bypassed and left as is.
3884 * so XML_SUBSTITUTE_REF is not set here.
3885 */
3886 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3887 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003888 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003889 *orig = buf;
3890 else
3891 xmlFree(buf);
3892 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003893
Owen Taylor3473f882001-02-23 17:55:21 +00003894 return(ret);
3895}
3896
3897/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003898 * xmlParseAttValueComplex:
3899 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003900 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003901 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003902 *
3903 * parse a value for an attribute, this is the fallback function
3904 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003905 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003906 *
3907 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3908 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003909static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003910xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003911 xmlChar limit = 0;
3912 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003913 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003914 size_t len = 0;
3915 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003916 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003917 xmlChar *current = NULL;
3918 xmlEntityPtr ent;
3919
Owen Taylor3473f882001-02-23 17:55:21 +00003920 if (NXT(0) == '"') {
3921 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3922 limit = '"';
3923 NEXT;
3924 } else if (NXT(0) == '\'') {
3925 limit = '\'';
3926 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3927 NEXT;
3928 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003929 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003930 return(NULL);
3931 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003932
Owen Taylor3473f882001-02-23 17:55:21 +00003933 /*
3934 * allocate a translation buffer.
3935 */
3936 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003937 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003938 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003939
3940 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003941 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003942 */
3943 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003944 while (((NXT(0) != limit) && /* checked */
3945 (IS_CHAR(c)) && (c != '<')) &&
3946 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003947 /*
3948 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3949 * special option is given
3950 */
3951 if ((len > XML_MAX_TEXT_LENGTH) &&
3952 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3953 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003954 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003955 goto mem_error;
3956 }
Owen Taylor3473f882001-02-23 17:55:21 +00003957 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003958 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003959 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003960 if (NXT(1) == '#') {
3961 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003962
Owen Taylor3473f882001-02-23 17:55:21 +00003963 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003964 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003965 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003966 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003967 }
3968 buf[len++] = '&';
3969 } else {
3970 /*
3971 * The reparsing will be done in xmlStringGetNodeList()
3972 * called by the attribute() function in SAX.c
3973 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003974 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003975 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003976 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003977 buf[len++] = '&';
3978 buf[len++] = '#';
3979 buf[len++] = '3';
3980 buf[len++] = '8';
3981 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003982 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003983 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003984 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003985 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003986 }
Owen Taylor3473f882001-02-23 17:55:21 +00003987 len += xmlCopyChar(0, &buf[len], val);
3988 }
3989 } else {
3990 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003991 ctxt->nbentities++;
3992 if (ent != NULL)
3993 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003994 if ((ent != NULL) &&
3995 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003996 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003997 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003998 }
3999 if ((ctxt->replaceEntities == 0) &&
4000 (ent->content[0] == '&')) {
4001 buf[len++] = '&';
4002 buf[len++] = '#';
4003 buf[len++] = '3';
4004 buf[len++] = '8';
4005 buf[len++] = ';';
4006 } else {
4007 buf[len++] = ent->content[0];
4008 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004009 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004010 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004011 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4012 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004013 XML_SUBSTITUTE_REF,
4014 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004015 if (rep != NULL) {
4016 current = rep;
4017 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004018 if ((*current == 0xD) || (*current == 0xA) ||
4019 (*current == 0x9)) {
4020 buf[len++] = 0x20;
4021 current++;
4022 } else
4023 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004024 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004025 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004026 }
4027 }
4028 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004029 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004030 }
4031 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004032 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004033 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004034 }
Owen Taylor3473f882001-02-23 17:55:21 +00004035 if (ent->content != NULL)
4036 buf[len++] = ent->content[0];
4037 }
4038 } else if (ent != NULL) {
4039 int i = xmlStrlen(ent->name);
4040 const xmlChar *cur = ent->name;
4041
4042 /*
4043 * This may look absurd but is needed to detect
4044 * entities problems
4045 */
4046 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4047 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004048 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004049 XML_SUBSTITUTE_REF, 0, 0, 0);
4050 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00004051 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004052 rep = NULL;
4053 }
Owen Taylor3473f882001-02-23 17:55:21 +00004054 }
4055
4056 /*
4057 * Just output the reference
4058 */
4059 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004060 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004061 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004062 }
4063 for (;i > 0;i--)
4064 buf[len++] = *cur++;
4065 buf[len++] = ';';
4066 }
4067 }
4068 } else {
4069 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004070 if ((len != 0) || (!normalize)) {
4071 if ((!normalize) || (!in_space)) {
4072 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004073 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004074 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004075 }
4076 }
4077 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004078 }
4079 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004080 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004081 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004082 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004083 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004084 }
4085 }
4086 NEXTL(l);
4087 }
4088 GROW;
4089 c = CUR_CHAR(l);
4090 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004091 if (ctxt->instate == XML_PARSER_EOF)
4092 goto error;
4093
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004094 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004095 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004096 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004097 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004098 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004099 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004100 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004101 if ((c != 0) && (!IS_CHAR(c))) {
4102 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4103 "invalid character in attribute value\n");
4104 } else {
4105 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4106 "AttValue: ' expected\n");
4107 }
Owen Taylor3473f882001-02-23 17:55:21 +00004108 } else
4109 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004110
4111 /*
4112 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004113 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004114 */
4115 if (len >= INT_MAX) {
4116 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004117 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004118 goto mem_error;
4119 }
4120
4121 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004122 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004123
4124mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004126error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004127 if (buf != NULL)
4128 xmlFree(buf);
4129 if (rep != NULL)
4130 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004131 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004132}
4133
4134/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004135 * xmlParseAttValue:
4136 * @ctxt: an XML parser context
4137 *
4138 * parse a value for an attribute
4139 * Note: the parser won't do substitution of entities here, this
4140 * will be handled later in xmlStringGetNodeList
4141 *
4142 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4143 * "'" ([^<&'] | Reference)* "'"
4144 *
4145 * 3.3.3 Attribute-Value Normalization:
4146 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004147 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004148 * - a character reference is processed by appending the referenced
4149 * character to the attribute value
4150 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004151 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004152 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4153 * appending #x20 to the normalized value, except that only a single
4154 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004155 * parsed entity or the literal entity value of an internal parsed entity
4156 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004157 * If the declared value is not CDATA, then the XML processor must further
4158 * process the normalized attribute value by discarding any leading and
4159 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004160 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004161 * All attributes for which no declaration has been read should be treated
4162 * by a non-validating parser as if declared CDATA.
4163 *
4164 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4165 */
4166
4167
4168xmlChar *
4169xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004170 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004171 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004172}
4173
4174/**
Owen Taylor3473f882001-02-23 17:55:21 +00004175 * xmlParseSystemLiteral:
4176 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004177 *
Owen Taylor3473f882001-02-23 17:55:21 +00004178 * parse an XML Literal
4179 *
4180 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4181 *
4182 * Returns the SystemLiteral parsed or NULL
4183 */
4184
4185xmlChar *
4186xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4187 xmlChar *buf = NULL;
4188 int len = 0;
4189 int size = XML_PARSER_BUFFER_SIZE;
4190 int cur, l;
4191 xmlChar stop;
4192 int state = ctxt->instate;
4193 int count = 0;
4194
4195 SHRINK;
4196 if (RAW == '"') {
4197 NEXT;
4198 stop = '"';
4199 } else if (RAW == '\'') {
4200 NEXT;
4201 stop = '\'';
4202 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004203 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004204 return(NULL);
4205 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004206
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004207 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004208 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004209 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004210 return(NULL);
4211 }
4212 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4213 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004214 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004215 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004216 xmlChar *tmp;
4217
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004218 if ((size > XML_MAX_NAME_LENGTH) &&
4219 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4220 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4221 xmlFree(buf);
4222 ctxt->instate = (xmlParserInputState) state;
4223 return(NULL);
4224 }
Owen Taylor3473f882001-02-23 17:55:21 +00004225 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004226 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4227 if (tmp == NULL) {
4228 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004229 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004230 ctxt->instate = (xmlParserInputState) state;
4231 return(NULL);
4232 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004233 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004234 }
4235 count++;
4236 if (count > 50) {
4237 GROW;
4238 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004239 if (ctxt->instate == XML_PARSER_EOF) {
4240 xmlFree(buf);
4241 return(NULL);
4242 }
Owen Taylor3473f882001-02-23 17:55:21 +00004243 }
4244 COPY_BUF(l,buf,len,cur);
4245 NEXTL(l);
4246 cur = CUR_CHAR(l);
4247 if (cur == 0) {
4248 GROW;
4249 SHRINK;
4250 cur = CUR_CHAR(l);
4251 }
4252 }
4253 buf[len] = 0;
4254 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004255 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004256 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004257 } else {
4258 NEXT;
4259 }
4260 return(buf);
4261}
4262
4263/**
4264 * xmlParsePubidLiteral:
4265 * @ctxt: an XML parser context
4266 *
4267 * parse an XML public literal
4268 *
4269 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4270 *
4271 * Returns the PubidLiteral parsed or NULL.
4272 */
4273
4274xmlChar *
4275xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4276 xmlChar *buf = NULL;
4277 int len = 0;
4278 int size = XML_PARSER_BUFFER_SIZE;
4279 xmlChar cur;
4280 xmlChar stop;
4281 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004282 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004283
4284 SHRINK;
4285 if (RAW == '"') {
4286 NEXT;
4287 stop = '"';
4288 } else if (RAW == '\'') {
4289 NEXT;
4290 stop = '\'';
4291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004292 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004293 return(NULL);
4294 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004295 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004296 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004297 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004298 return(NULL);
4299 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004300 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004301 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004302 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004303 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004304 xmlChar *tmp;
4305
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004306 if ((size > XML_MAX_NAME_LENGTH) &&
4307 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4308 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4309 xmlFree(buf);
4310 return(NULL);
4311 }
Owen Taylor3473f882001-02-23 17:55:21 +00004312 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004313 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4314 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004315 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004316 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004317 return(NULL);
4318 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004319 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004320 }
4321 buf[len++] = cur;
4322 count++;
4323 if (count > 50) {
4324 GROW;
4325 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004326 if (ctxt->instate == XML_PARSER_EOF) {
4327 xmlFree(buf);
4328 return(NULL);
4329 }
Owen Taylor3473f882001-02-23 17:55:21 +00004330 }
4331 NEXT;
4332 cur = CUR;
4333 if (cur == 0) {
4334 GROW;
4335 SHRINK;
4336 cur = CUR;
4337 }
4338 }
4339 buf[len] = 0;
4340 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004341 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004342 } else {
4343 NEXT;
4344 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004345 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004346 return(buf);
4347}
4348
Daniel Veillard8ed10722009-08-20 19:17:36 +02004349static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004350
4351/*
4352 * used for the test in the inner loop of the char data testing
4353 */
4354static const unsigned char test_char_data[256] = {
4355 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4356 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4360 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4361 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4362 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4363 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4364 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4365 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4366 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4367 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4368 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4369 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4370 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4371 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4372 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4373 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4374 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4375 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4376 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4377 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4378 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4379 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4380 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4381 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4382 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4387};
4388
Owen Taylor3473f882001-02-23 17:55:21 +00004389/**
4390 * xmlParseCharData:
4391 * @ctxt: an XML parser context
4392 * @cdata: int indicating whether we are within a CDATA section
4393 *
4394 * parse a CharData section.
4395 * if we are within a CDATA section ']]>' marks an end of section.
4396 *
4397 * The right angle bracket (>) may be represented using the string "&gt;",
4398 * and must, for compatibility, be escaped using "&gt;" or a character
4399 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004400 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004401 *
4402 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4403 */
4404
4405void
4406xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004407 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004408 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004409 int line = ctxt->input->line;
4410 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004411 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004412
4413 SHRINK;
4414 GROW;
4415 /*
4416 * Accelerated common case where input don't need to be
4417 * modified before passing it to the handler.
4418 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004419 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004420 in = ctxt->input->cur;
4421 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004422get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004423 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004424 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004425 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004426 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004427 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004428 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004429 goto get_more_space;
4430 }
4431 if (*in == '<') {
4432 nbchar = in - ctxt->input->cur;
4433 if (nbchar > 0) {
4434 const xmlChar *tmp = ctxt->input->cur;
4435 ctxt->input->cur = in;
4436
Daniel Veillard34099b42004-11-04 17:34:35 +00004437 if ((ctxt->sax != NULL) &&
4438 (ctxt->sax->ignorableWhitespace !=
4439 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004440 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004441 if (ctxt->sax->ignorableWhitespace != NULL)
4442 ctxt->sax->ignorableWhitespace(ctxt->userData,
4443 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004444 } else {
4445 if (ctxt->sax->characters != NULL)
4446 ctxt->sax->characters(ctxt->userData,
4447 tmp, nbchar);
4448 if (*ctxt->space == -1)
4449 *ctxt->space = -2;
4450 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004451 } else if ((ctxt->sax != NULL) &&
4452 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004453 ctxt->sax->characters(ctxt->userData,
4454 tmp, nbchar);
4455 }
4456 }
4457 return;
4458 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004459
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004460get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004461 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004462 while (test_char_data[*in]) {
4463 in++;
4464 ccol++;
4465 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004466 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004467 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004468 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004469 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004470 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004471 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004472 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004473 }
4474 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004475 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004476 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004477 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004478 return;
4479 }
4480 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004481 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004482 goto get_more;
4483 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004484 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004485 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004486 if ((ctxt->sax != NULL) &&
4487 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004488 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004489 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004490 const xmlChar *tmp = ctxt->input->cur;
4491 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004492
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004493 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004494 if (ctxt->sax->ignorableWhitespace != NULL)
4495 ctxt->sax->ignorableWhitespace(ctxt->userData,
4496 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004497 } else {
4498 if (ctxt->sax->characters != NULL)
4499 ctxt->sax->characters(ctxt->userData,
4500 tmp, nbchar);
4501 if (*ctxt->space == -1)
4502 *ctxt->space = -2;
4503 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004504 line = ctxt->input->line;
4505 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004506 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004507 if (ctxt->sax->characters != NULL)
4508 ctxt->sax->characters(ctxt->userData,
4509 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004510 line = ctxt->input->line;
4511 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004512 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004513 /* something really bad happened in the SAX callback */
4514 if (ctxt->instate != XML_PARSER_CONTENT)
4515 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004516 }
4517 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004518 if (*in == 0xD) {
4519 in++;
4520 if (*in == 0xA) {
4521 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004522 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004523 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004524 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004525 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004526 in--;
4527 }
4528 if (*in == '<') {
4529 return;
4530 }
4531 if (*in == '&') {
4532 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004533 }
4534 SHRINK;
4535 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004536 if (ctxt->instate == XML_PARSER_EOF)
4537 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004538 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004539 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004540 nbchar = 0;
4541 }
Daniel Veillard50582112001-03-26 22:52:16 +00004542 ctxt->input->line = line;
4543 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004544 xmlParseCharDataComplex(ctxt, cdata);
4545}
4546
Daniel Veillard01c13b52002-12-10 15:19:08 +00004547/**
4548 * xmlParseCharDataComplex:
4549 * @ctxt: an XML parser context
4550 * @cdata: int indicating whether we are within a CDATA section
4551 *
4552 * parse a CharData section.this is the fallback function
4553 * of xmlParseCharData() when the parsing requires handling
4554 * of non-ASCII characters.
4555 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004556static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004557xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004558 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4559 int nbchar = 0;
4560 int cur, l;
4561 int count = 0;
4562
4563 SHRINK;
4564 GROW;
4565 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004566 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004567 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004568 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004569 if ((cur == ']') && (NXT(1) == ']') &&
4570 (NXT(2) == '>')) {
4571 if (cdata) break;
4572 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004573 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004574 }
4575 }
4576 COPY_BUF(l,buf,nbchar,cur);
4577 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004578 buf[nbchar] = 0;
4579
Owen Taylor3473f882001-02-23 17:55:21 +00004580 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004581 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004582 */
4583 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004584 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004585 if (ctxt->sax->ignorableWhitespace != NULL)
4586 ctxt->sax->ignorableWhitespace(ctxt->userData,
4587 buf, nbchar);
4588 } else {
4589 if (ctxt->sax->characters != NULL)
4590 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004591 if ((ctxt->sax->characters !=
4592 ctxt->sax->ignorableWhitespace) &&
4593 (*ctxt->space == -1))
4594 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004595 }
4596 }
4597 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004598 /* something really bad happened in the SAX callback */
4599 if (ctxt->instate != XML_PARSER_CONTENT)
4600 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004601 }
4602 count++;
4603 if (count > 50) {
4604 GROW;
4605 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004606 if (ctxt->instate == XML_PARSER_EOF)
4607 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004608 }
4609 NEXTL(l);
4610 cur = CUR_CHAR(l);
4611 }
4612 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004613 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004614 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004615 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004616 */
4617 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004618 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004619 if (ctxt->sax->ignorableWhitespace != NULL)
4620 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4621 } else {
4622 if (ctxt->sax->characters != NULL)
4623 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004624 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4625 (*ctxt->space == -1))
4626 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004627 }
4628 }
4629 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004630 if ((cur != 0) && (!IS_CHAR(cur))) {
4631 /* Generate the error and skip the offending character */
4632 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4633 "PCDATA invalid Char value %d\n",
4634 cur);
4635 NEXTL(l);
4636 }
Owen Taylor3473f882001-02-23 17:55:21 +00004637}
4638
4639/**
4640 * xmlParseExternalID:
4641 * @ctxt: an XML parser context
4642 * @publicID: a xmlChar** receiving PubidLiteral
4643 * @strict: indicate whether we should restrict parsing to only
4644 * production [75], see NOTE below
4645 *
4646 * Parse an External ID or a Public ID
4647 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004648 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004649 * 'PUBLIC' S PubidLiteral S SystemLiteral
4650 *
4651 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4652 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4653 *
4654 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4655 *
4656 * Returns the function returns SystemLiteral and in the second
4657 * case publicID receives PubidLiteral, is strict is off
4658 * it is possible to return NULL and have publicID set.
4659 */
4660
4661xmlChar *
4662xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4663 xmlChar *URI = NULL;
4664
4665 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004666
4667 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004668 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004669 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004670 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004671 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4672 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004673 }
4674 SKIP_BLANKS;
4675 URI = xmlParseSystemLiteral(ctxt);
4676 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004677 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004678 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004679 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004680 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004681 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004682 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004683 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004684 }
4685 SKIP_BLANKS;
4686 *publicID = xmlParsePubidLiteral(ctxt);
4687 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004688 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004689 }
4690 if (strict) {
4691 /*
4692 * We don't handle [83] so "S SystemLiteral" is required.
4693 */
William M. Brack76e95df2003-10-18 16:20:14 +00004694 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004695 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004696 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004697 }
4698 } else {
4699 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004700 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004701 * "S SystemLiteral" is not detected. From a purely parsing
4702 * point of view that's a nice mess.
4703 */
4704 const xmlChar *ptr;
4705 GROW;
4706
4707 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004708 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004709
William M. Brack76e95df2003-10-18 16:20:14 +00004710 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004711 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4712 }
4713 SKIP_BLANKS;
4714 URI = xmlParseSystemLiteral(ctxt);
4715 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004716 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004717 }
4718 }
4719 return(URI);
4720}
4721
4722/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004723 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004724 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004725 * @buf: the already parsed part of the buffer
4726 * @len: number of bytes filles in the buffer
4727 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004728 *
4729 * Skip an XML (SGML) comment <!-- .... -->
4730 * The spec says that "For compatibility, the string "--" (double-hyphen)
4731 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004732 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004733 *
4734 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4735 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004736static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004737xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4738 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004739 int q, ql;
4740 int r, rl;
4741 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004742 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004743 int inputid;
4744
4745 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004746
Owen Taylor3473f882001-02-23 17:55:21 +00004747 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004748 len = 0;
4749 size = XML_PARSER_BUFFER_SIZE;
4750 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4751 if (buf == NULL) {
4752 xmlErrMemory(ctxt, NULL);
4753 return;
4754 }
Owen Taylor3473f882001-02-23 17:55:21 +00004755 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004756 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004757 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004758 if (q == 0)
4759 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004760 if (!IS_CHAR(q)) {
4761 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4762 "xmlParseComment: invalid xmlChar value %d\n",
4763 q);
4764 xmlFree (buf);
4765 return;
4766 }
Owen Taylor3473f882001-02-23 17:55:21 +00004767 NEXTL(ql);
4768 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004769 if (r == 0)
4770 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004771 if (!IS_CHAR(r)) {
4772 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4773 "xmlParseComment: invalid xmlChar value %d\n",
4774 q);
4775 xmlFree (buf);
4776 return;
4777 }
Owen Taylor3473f882001-02-23 17:55:21 +00004778 NEXTL(rl);
4779 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004780 if (cur == 0)
4781 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004782 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004783 ((cur != '>') ||
4784 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004785 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004786 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004787 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004788 if ((len > XML_MAX_TEXT_LENGTH) &&
4789 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4790 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4791 "Comment too big found", NULL);
4792 xmlFree (buf);
4793 return;
4794 }
Owen Taylor3473f882001-02-23 17:55:21 +00004795 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004796 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004797 size_t new_size;
4798
4799 new_size = size * 2;
4800 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004801 if (new_buf == NULL) {
4802 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004803 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004804 return;
4805 }
William M. Bracka3215c72004-07-31 16:24:01 +00004806 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004807 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004808 }
4809 COPY_BUF(ql,buf,len,q);
4810 q = r;
4811 ql = rl;
4812 r = cur;
4813 rl = l;
4814
4815 count++;
4816 if (count > 50) {
4817 GROW;
4818 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004819 if (ctxt->instate == XML_PARSER_EOF) {
4820 xmlFree(buf);
4821 return;
4822 }
Owen Taylor3473f882001-02-23 17:55:21 +00004823 }
4824 NEXTL(l);
4825 cur = CUR_CHAR(l);
4826 if (cur == 0) {
4827 SHRINK;
4828 GROW;
4829 cur = CUR_CHAR(l);
4830 }
4831 }
4832 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004833 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004834 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004835 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004836 } else if (!IS_CHAR(cur)) {
4837 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4838 "xmlParseComment: invalid xmlChar value %d\n",
4839 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004840 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004841 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004842 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4843 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004844 }
4845 NEXT;
4846 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4847 (!ctxt->disableSAX))
4848 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004849 }
Daniel Veillardda629342007-08-01 07:49:06 +00004850 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004851 return;
4852not_terminated:
4853 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4854 "Comment not terminated\n", NULL);
4855 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004856 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004857}
Daniel Veillardda629342007-08-01 07:49:06 +00004858
Daniel Veillard4c778d82005-01-23 17:37:44 +00004859/**
4860 * xmlParseComment:
4861 * @ctxt: an XML parser context
4862 *
4863 * Skip an XML (SGML) comment <!-- .... -->
4864 * The spec says that "For compatibility, the string "--" (double-hyphen)
4865 * must not occur within comments. "
4866 *
4867 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4868 */
4869void
4870xmlParseComment(xmlParserCtxtPtr ctxt) {
4871 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004872 size_t size = XML_PARSER_BUFFER_SIZE;
4873 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004874 xmlParserInputState state;
4875 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004876 size_t nbchar = 0;
4877 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004878 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004879
4880 /*
4881 * Check that there is a comment right here.
4882 */
4883 if ((RAW != '<') || (NXT(1) != '!') ||
4884 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004885 state = ctxt->instate;
4886 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004887 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004888 SKIP(4);
4889 SHRINK;
4890 GROW;
4891
4892 /*
4893 * Accelerated common case where input don't need to be
4894 * modified before passing it to the handler.
4895 */
4896 in = ctxt->input->cur;
4897 do {
4898 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004899 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004900 ctxt->input->line++; ctxt->input->col = 1;
4901 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004902 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004903 }
4904get_more:
4905 ccol = ctxt->input->col;
4906 while (((*in > '-') && (*in <= 0x7F)) ||
4907 ((*in >= 0x20) && (*in < '-')) ||
4908 (*in == 0x09)) {
4909 in++;
4910 ccol++;
4911 }
4912 ctxt->input->col = ccol;
4913 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004914 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004915 ctxt->input->line++; ctxt->input->col = 1;
4916 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004917 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004918 goto get_more;
4919 }
4920 nbchar = in - ctxt->input->cur;
4921 /*
4922 * save current set of data
4923 */
4924 if (nbchar > 0) {
4925 if ((ctxt->sax != NULL) &&
4926 (ctxt->sax->comment != NULL)) {
4927 if (buf == NULL) {
4928 if ((*in == '-') && (in[1] == '-'))
4929 size = nbchar + 1;
4930 else
4931 size = XML_PARSER_BUFFER_SIZE + nbchar;
4932 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4933 if (buf == NULL) {
4934 xmlErrMemory(ctxt, NULL);
4935 ctxt->instate = state;
4936 return;
4937 }
4938 len = 0;
4939 } else if (len + nbchar + 1 >= size) {
4940 xmlChar *new_buf;
4941 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4942 new_buf = (xmlChar *) xmlRealloc(buf,
4943 size * sizeof(xmlChar));
4944 if (new_buf == NULL) {
4945 xmlFree (buf);
4946 xmlErrMemory(ctxt, NULL);
4947 ctxt->instate = state;
4948 return;
4949 }
4950 buf = new_buf;
4951 }
4952 memcpy(&buf[len], ctxt->input->cur, nbchar);
4953 len += nbchar;
4954 buf[len] = 0;
4955 }
4956 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004957 if ((len > XML_MAX_TEXT_LENGTH) &&
4958 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4959 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4960 "Comment too big found", NULL);
4961 xmlFree (buf);
4962 return;
4963 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004964 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004965 if (*in == 0xA) {
4966 in++;
4967 ctxt->input->line++; ctxt->input->col = 1;
4968 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004969 if (*in == 0xD) {
4970 in++;
4971 if (*in == 0xA) {
4972 ctxt->input->cur = in;
4973 in++;
4974 ctxt->input->line++; ctxt->input->col = 1;
4975 continue; /* while */
4976 }
4977 in--;
4978 }
4979 SHRINK;
4980 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004981 if (ctxt->instate == XML_PARSER_EOF) {
4982 xmlFree(buf);
4983 return;
4984 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004985 in = ctxt->input->cur;
4986 if (*in == '-') {
4987 if (in[1] == '-') {
4988 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004989 if (ctxt->input->id != inputid) {
4990 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4991 "comment doesn't start and stop in the same entity\n");
4992 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004993 SKIP(3);
4994 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4995 (!ctxt->disableSAX)) {
4996 if (buf != NULL)
4997 ctxt->sax->comment(ctxt->userData, buf);
4998 else
4999 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5000 }
5001 if (buf != NULL)
5002 xmlFree(buf);
5003 ctxt->instate = state;
5004 return;
5005 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005006 if (buf != NULL) {
5007 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5008 "Double hyphen within comment: "
5009 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005010 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005011 } else
5012 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5013 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005014 in++;
5015 ctxt->input->col++;
5016 }
5017 in++;
5018 ctxt->input->col++;
5019 goto get_more;
5020 }
5021 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5022 xmlParseCommentComplex(ctxt, buf, len, size);
5023 ctxt->instate = state;
5024 return;
5025}
5026
Owen Taylor3473f882001-02-23 17:55:21 +00005027
5028/**
5029 * xmlParsePITarget:
5030 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005031 *
Owen Taylor3473f882001-02-23 17:55:21 +00005032 * parse the name of a PI
5033 *
5034 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5035 *
5036 * Returns the PITarget name or NULL
5037 */
5038
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005039const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005040xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005041 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005042
5043 name = xmlParseName(ctxt);
5044 if ((name != NULL) &&
5045 ((name[0] == 'x') || (name[0] == 'X')) &&
5046 ((name[1] == 'm') || (name[1] == 'M')) &&
5047 ((name[2] == 'l') || (name[2] == 'L'))) {
5048 int i;
5049 if ((name[0] == 'x') && (name[1] == 'm') &&
5050 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005051 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005052 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005053 return(name);
5054 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005055 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005056 return(name);
5057 }
5058 for (i = 0;;i++) {
5059 if (xmlW3CPIs[i] == NULL) break;
5060 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5061 return(name);
5062 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005063 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5064 "xmlParsePITarget: invalid name prefix 'xml'\n",
5065 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005066 }
Daniel Veillard37334572008-07-31 08:20:02 +00005067 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005068 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005069 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5070 }
Owen Taylor3473f882001-02-23 17:55:21 +00005071 return(name);
5072}
5073
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005074#ifdef LIBXML_CATALOG_ENABLED
5075/**
5076 * xmlParseCatalogPI:
5077 * @ctxt: an XML parser context
5078 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005079 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005080 * parse an XML Catalog Processing Instruction.
5081 *
5082 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5083 *
5084 * Occurs only if allowed by the user and if happening in the Misc
5085 * part of the document before any doctype informations
5086 * This will add the given catalog to the parsing context in order
5087 * to be used if there is a resolution need further down in the document
5088 */
5089
5090static void
5091xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5092 xmlChar *URL = NULL;
5093 const xmlChar *tmp, *base;
5094 xmlChar marker;
5095
5096 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005097 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005098 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5099 goto error;
5100 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005101 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005102 if (*tmp != '=') {
5103 return;
5104 }
5105 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005106 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005107 marker = *tmp;
5108 if ((marker != '\'') && (marker != '"'))
5109 goto error;
5110 tmp++;
5111 base = tmp;
5112 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5113 if (*tmp == 0)
5114 goto error;
5115 URL = xmlStrndup(base, tmp - base);
5116 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005117 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005118 if (*tmp != 0)
5119 goto error;
5120
5121 if (URL != NULL) {
5122 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5123 xmlFree(URL);
5124 }
5125 return;
5126
5127error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005128 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5129 "Catalog PI syntax error: %s\n",
5130 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005131 if (URL != NULL)
5132 xmlFree(URL);
5133}
5134#endif
5135
Owen Taylor3473f882001-02-23 17:55:21 +00005136/**
5137 * xmlParsePI:
5138 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005139 *
Owen Taylor3473f882001-02-23 17:55:21 +00005140 * parse an XML Processing Instruction.
5141 *
5142 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5143 *
5144 * The processing is transfered to SAX once parsed.
5145 */
5146
5147void
5148xmlParsePI(xmlParserCtxtPtr ctxt) {
5149 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005150 size_t len = 0;
5151 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005152 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005153 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005154 xmlParserInputState state;
5155 int count = 0;
5156
5157 if ((RAW == '<') && (NXT(1) == '?')) {
5158 xmlParserInputPtr input = ctxt->input;
5159 state = ctxt->instate;
5160 ctxt->instate = XML_PARSER_PI;
5161 /*
5162 * this is a Processing Instruction.
5163 */
5164 SKIP(2);
5165 SHRINK;
5166
5167 /*
5168 * Parse the target name and check for special support like
5169 * namespace.
5170 */
5171 target = xmlParsePITarget(ctxt);
5172 if (target != NULL) {
5173 if ((RAW == '?') && (NXT(1) == '>')) {
5174 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005175 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5176 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005177 }
5178 SKIP(2);
5179
5180 /*
5181 * SAX: PI detected.
5182 */
5183 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5184 (ctxt->sax->processingInstruction != NULL))
5185 ctxt->sax->processingInstruction(ctxt->userData,
5186 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005187 if (ctxt->instate != XML_PARSER_EOF)
5188 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005189 return;
5190 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005191 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005192 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005193 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005194 ctxt->instate = state;
5195 return;
5196 }
5197 cur = CUR;
5198 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005199 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5200 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005201 }
5202 SKIP_BLANKS;
5203 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005204 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005205 ((cur != '?') || (NXT(1) != '>'))) {
5206 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005207 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005208 size_t new_size = size * 2;
5209 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005210 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005211 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005212 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005213 ctxt->instate = state;
5214 return;
5215 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005216 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005217 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005218 }
5219 count++;
5220 if (count > 50) {
5221 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005222 if (ctxt->instate == XML_PARSER_EOF) {
5223 xmlFree(buf);
5224 return;
5225 }
Owen Taylor3473f882001-02-23 17:55:21 +00005226 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005227 if ((len > XML_MAX_TEXT_LENGTH) &&
5228 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5229 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5230 "PI %s too big found", target);
5231 xmlFree(buf);
5232 ctxt->instate = state;
5233 return;
5234 }
Owen Taylor3473f882001-02-23 17:55:21 +00005235 }
5236 COPY_BUF(l,buf,len,cur);
5237 NEXTL(l);
5238 cur = CUR_CHAR(l);
5239 if (cur == 0) {
5240 SHRINK;
5241 GROW;
5242 cur = CUR_CHAR(l);
5243 }
5244 }
Daniel Veillard51304812012-07-19 20:34:26 +08005245 if ((len > XML_MAX_TEXT_LENGTH) &&
5246 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5247 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5248 "PI %s too big found", target);
5249 xmlFree(buf);
5250 ctxt->instate = state;
5251 return;
5252 }
Owen Taylor3473f882001-02-23 17:55:21 +00005253 buf[len] = 0;
5254 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005255 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5256 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005257 } else {
5258 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005259 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5260 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005261 }
5262 SKIP(2);
5263
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005264#ifdef LIBXML_CATALOG_ENABLED
5265 if (((state == XML_PARSER_MISC) ||
5266 (state == XML_PARSER_START)) &&
5267 (xmlStrEqual(target, XML_CATALOG_PI))) {
5268 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5269 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5270 (allow == XML_CATA_ALLOW_ALL))
5271 xmlParseCatalogPI(ctxt, buf);
5272 }
5273#endif
5274
5275
Owen Taylor3473f882001-02-23 17:55:21 +00005276 /*
5277 * SAX: PI detected.
5278 */
5279 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5280 (ctxt->sax->processingInstruction != NULL))
5281 ctxt->sax->processingInstruction(ctxt->userData,
5282 target, buf);
5283 }
5284 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005285 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005286 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005287 }
Chris Evans77404b82011-12-14 16:18:25 +08005288 if (ctxt->instate != XML_PARSER_EOF)
5289 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005290 }
5291}
5292
5293/**
5294 * xmlParseNotationDecl:
5295 * @ctxt: an XML parser context
5296 *
5297 * parse a notation declaration
5298 *
5299 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5300 *
5301 * Hence there is actually 3 choices:
5302 * 'PUBLIC' S PubidLiteral
5303 * 'PUBLIC' S PubidLiteral S SystemLiteral
5304 * and 'SYSTEM' S SystemLiteral
5305 *
5306 * See the NOTE on xmlParseExternalID().
5307 */
5308
5309void
5310xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005311 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005312 xmlChar *Pubid;
5313 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005314
Daniel Veillarda07050d2003-10-19 14:46:32 +00005315 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005316 xmlParserInputPtr input = ctxt->input;
5317 SHRINK;
5318 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005319 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005320 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5321 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005322 return;
5323 }
5324 SKIP_BLANKS;
5325
Daniel Veillard76d66f42001-05-16 21:05:17 +00005326 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005327 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005328 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005329 return;
5330 }
William M. Brack76e95df2003-10-18 16:20:14 +00005331 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005332 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005333 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005334 return;
5335 }
Daniel Veillard37334572008-07-31 08:20:02 +00005336 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005337 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005338 "colon are forbidden from notation names '%s'\n",
5339 name, NULL, NULL);
5340 }
Owen Taylor3473f882001-02-23 17:55:21 +00005341 SKIP_BLANKS;
5342
5343 /*
5344 * Parse the IDs.
5345 */
5346 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5347 SKIP_BLANKS;
5348
5349 if (RAW == '>') {
5350 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005351 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5352 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005353 }
5354 NEXT;
5355 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5356 (ctxt->sax->notationDecl != NULL))
5357 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5358 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005359 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005360 }
Owen Taylor3473f882001-02-23 17:55:21 +00005361 if (Systemid != NULL) xmlFree(Systemid);
5362 if (Pubid != NULL) xmlFree(Pubid);
5363 }
5364}
5365
5366/**
5367 * xmlParseEntityDecl:
5368 * @ctxt: an XML parser context
5369 *
5370 * parse <!ENTITY declarations
5371 *
5372 * [70] EntityDecl ::= GEDecl | PEDecl
5373 *
5374 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5375 *
5376 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5377 *
5378 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5379 *
5380 * [74] PEDef ::= EntityValue | ExternalID
5381 *
5382 * [76] NDataDecl ::= S 'NDATA' S Name
5383 *
5384 * [ VC: Notation Declared ]
5385 * The Name must match the declared name of a notation.
5386 */
5387
5388void
5389xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005390 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005391 xmlChar *value = NULL;
5392 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005393 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005394 int isParameter = 0;
5395 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005396 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005397
Daniel Veillard4c778d82005-01-23 17:37:44 +00005398 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005399 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005400 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005401 SHRINK;
5402 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005403 skipped = SKIP_BLANKS;
5404 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005405 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5406 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005407 }
Owen Taylor3473f882001-02-23 17:55:21 +00005408
5409 if (RAW == '%') {
5410 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005411 skipped = SKIP_BLANKS;
5412 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005413 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5414 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005415 }
Owen Taylor3473f882001-02-23 17:55:21 +00005416 isParameter = 1;
5417 }
5418
Daniel Veillard76d66f42001-05-16 21:05:17 +00005419 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005420 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005421 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5422 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005423 return;
5424 }
Daniel Veillard37334572008-07-31 08:20:02 +00005425 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005426 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005427 "colon are forbidden from entities names '%s'\n",
5428 name, NULL, NULL);
5429 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005430 skipped = SKIP_BLANKS;
5431 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005432 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5433 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005434 }
Owen Taylor3473f882001-02-23 17:55:21 +00005435
Daniel Veillardf5582f12002-06-11 10:08:16 +00005436 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005437 /*
5438 * handle the various case of definitions...
5439 */
5440 if (isParameter) {
5441 if ((RAW == '"') || (RAW == '\'')) {
5442 value = xmlParseEntityValue(ctxt, &orig);
5443 if (value) {
5444 if ((ctxt->sax != NULL) &&
5445 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5446 ctxt->sax->entityDecl(ctxt->userData, name,
5447 XML_INTERNAL_PARAMETER_ENTITY,
5448 NULL, NULL, value);
5449 }
5450 } else {
5451 URI = xmlParseExternalID(ctxt, &literal, 1);
5452 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005453 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005454 }
5455 if (URI) {
5456 xmlURIPtr uri;
5457
5458 uri = xmlParseURI((const char *) URI);
5459 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005460 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5461 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005462 /*
5463 * This really ought to be a well formedness error
5464 * but the XML Core WG decided otherwise c.f. issue
5465 * E26 of the XML erratas.
5466 */
Owen Taylor3473f882001-02-23 17:55:21 +00005467 } else {
5468 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005469 /*
5470 * Okay this is foolish to block those but not
5471 * invalid URIs.
5472 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005473 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005474 } else {
5475 if ((ctxt->sax != NULL) &&
5476 (!ctxt->disableSAX) &&
5477 (ctxt->sax->entityDecl != NULL))
5478 ctxt->sax->entityDecl(ctxt->userData, name,
5479 XML_EXTERNAL_PARAMETER_ENTITY,
5480 literal, URI, NULL);
5481 }
5482 xmlFreeURI(uri);
5483 }
5484 }
5485 }
5486 } else {
5487 if ((RAW == '"') || (RAW == '\'')) {
5488 value = xmlParseEntityValue(ctxt, &orig);
5489 if ((ctxt->sax != NULL) &&
5490 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5491 ctxt->sax->entityDecl(ctxt->userData, name,
5492 XML_INTERNAL_GENERAL_ENTITY,
5493 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005494 /*
5495 * For expat compatibility in SAX mode.
5496 */
5497 if ((ctxt->myDoc == NULL) ||
5498 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5499 if (ctxt->myDoc == NULL) {
5500 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005501 if (ctxt->myDoc == NULL) {
5502 xmlErrMemory(ctxt, "New Doc failed");
5503 return;
5504 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005505 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005506 }
5507 if (ctxt->myDoc->intSubset == NULL)
5508 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5509 BAD_CAST "fake", NULL, NULL);
5510
Daniel Veillard1af9a412003-08-20 22:54:39 +00005511 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5512 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005513 }
Owen Taylor3473f882001-02-23 17:55:21 +00005514 } else {
5515 URI = xmlParseExternalID(ctxt, &literal, 1);
5516 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005517 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005518 }
5519 if (URI) {
5520 xmlURIPtr uri;
5521
5522 uri = xmlParseURI((const char *)URI);
5523 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005524 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5525 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005526 /*
5527 * This really ought to be a well formedness error
5528 * but the XML Core WG decided otherwise c.f. issue
5529 * E26 of the XML erratas.
5530 */
Owen Taylor3473f882001-02-23 17:55:21 +00005531 } else {
5532 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005533 /*
5534 * Okay this is foolish to block those but not
5535 * invalid URIs.
5536 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005537 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005538 }
5539 xmlFreeURI(uri);
5540 }
5541 }
William M. Brack76e95df2003-10-18 16:20:14 +00005542 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005543 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5544 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005545 }
5546 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005547 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005548 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005549 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005550 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5551 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005552 }
5553 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005554 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005555 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5556 (ctxt->sax->unparsedEntityDecl != NULL))
5557 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5558 literal, URI, ndata);
5559 } else {
5560 if ((ctxt->sax != NULL) &&
5561 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5562 ctxt->sax->entityDecl(ctxt->userData, name,
5563 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5564 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005565 /*
5566 * For expat compatibility in SAX mode.
5567 * assuming the entity repalcement was asked for
5568 */
5569 if ((ctxt->replaceEntities != 0) &&
5570 ((ctxt->myDoc == NULL) ||
5571 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5572 if (ctxt->myDoc == NULL) {
5573 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005574 if (ctxt->myDoc == NULL) {
5575 xmlErrMemory(ctxt, "New Doc failed");
5576 return;
5577 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005578 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005579 }
5580
5581 if (ctxt->myDoc->intSubset == NULL)
5582 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5583 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005584 xmlSAX2EntityDecl(ctxt, name,
5585 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5586 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005587 }
Owen Taylor3473f882001-02-23 17:55:21 +00005588 }
5589 }
5590 }
5591 SKIP_BLANKS;
5592 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005593 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005594 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005595 } else {
5596 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005597 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5598 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005599 }
5600 NEXT;
5601 }
5602 if (orig != NULL) {
5603 /*
5604 * Ugly mechanism to save the raw entity value.
5605 */
5606 xmlEntityPtr cur = NULL;
5607
5608 if (isParameter) {
5609 if ((ctxt->sax != NULL) &&
5610 (ctxt->sax->getParameterEntity != NULL))
5611 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5612 } else {
5613 if ((ctxt->sax != NULL) &&
5614 (ctxt->sax->getEntity != NULL))
5615 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005616 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005617 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005618 }
Owen Taylor3473f882001-02-23 17:55:21 +00005619 }
5620 if (cur != NULL) {
5621 if (cur->orig != NULL)
5622 xmlFree(orig);
5623 else
5624 cur->orig = orig;
5625 } else
5626 xmlFree(orig);
5627 }
Owen Taylor3473f882001-02-23 17:55:21 +00005628 if (value != NULL) xmlFree(value);
5629 if (URI != NULL) xmlFree(URI);
5630 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005631 }
5632}
5633
5634/**
5635 * xmlParseDefaultDecl:
5636 * @ctxt: an XML parser context
5637 * @value: Receive a possible fixed default value for the attribute
5638 *
5639 * Parse an attribute default declaration
5640 *
5641 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5642 *
5643 * [ VC: Required Attribute ]
5644 * if the default declaration is the keyword #REQUIRED, then the
5645 * attribute must be specified for all elements of the type in the
5646 * attribute-list declaration.
5647 *
5648 * [ VC: Attribute Default Legal ]
5649 * The declared default value must meet the lexical constraints of
5650 * the declared attribute type c.f. xmlValidateAttributeDecl()
5651 *
5652 * [ VC: Fixed Attribute Default ]
5653 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005654 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005655 *
5656 * [ WFC: No < in Attribute Values ]
5657 * handled in xmlParseAttValue()
5658 *
5659 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005660 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005661 */
5662
5663int
5664xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5665 int val;
5666 xmlChar *ret;
5667
5668 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005669 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005670 SKIP(9);
5671 return(XML_ATTRIBUTE_REQUIRED);
5672 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005673 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005674 SKIP(8);
5675 return(XML_ATTRIBUTE_IMPLIED);
5676 }
5677 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005678 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005679 SKIP(6);
5680 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005681 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005682 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5683 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005684 }
5685 SKIP_BLANKS;
5686 }
5687 ret = xmlParseAttValue(ctxt);
5688 ctxt->instate = XML_PARSER_DTD;
5689 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005690 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005691 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005692 } else
5693 *value = ret;
5694 return(val);
5695}
5696
5697/**
5698 * xmlParseNotationType:
5699 * @ctxt: an XML parser context
5700 *
5701 * parse an Notation attribute type.
5702 *
5703 * Note: the leading 'NOTATION' S part has already being parsed...
5704 *
5705 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5706 *
5707 * [ VC: Notation Attributes ]
5708 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005709 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005710 *
5711 * Returns: the notation attribute tree built while parsing
5712 */
5713
5714xmlEnumerationPtr
5715xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005716 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005717 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005718
5719 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005720 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005721 return(NULL);
5722 }
5723 SHRINK;
5724 do {
5725 NEXT;
5726 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005727 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005728 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005729 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5730 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005731 xmlFreeEnumeration(ret);
5732 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005733 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005734 tmp = ret;
5735 while (tmp != NULL) {
5736 if (xmlStrEqual(name, tmp->name)) {
5737 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5738 "standalone: attribute notation value token %s duplicated\n",
5739 name, NULL);
5740 if (!xmlDictOwns(ctxt->dict, name))
5741 xmlFree((xmlChar *) name);
5742 break;
5743 }
5744 tmp = tmp->next;
5745 }
5746 if (tmp == NULL) {
5747 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005748 if (cur == NULL) {
5749 xmlFreeEnumeration(ret);
5750 return(NULL);
5751 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005752 if (last == NULL) ret = last = cur;
5753 else {
5754 last->next = cur;
5755 last = cur;
5756 }
Owen Taylor3473f882001-02-23 17:55:21 +00005757 }
5758 SKIP_BLANKS;
5759 } while (RAW == '|');
5760 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005761 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005762 xmlFreeEnumeration(ret);
5763 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005764 }
5765 NEXT;
5766 return(ret);
5767}
5768
5769/**
5770 * xmlParseEnumerationType:
5771 * @ctxt: an XML parser context
5772 *
5773 * parse an Enumeration attribute type.
5774 *
5775 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5776 *
5777 * [ VC: Enumeration ]
5778 * Values of this type must match one of the Nmtoken tokens in
5779 * the declaration
5780 *
5781 * Returns: the enumeration attribute tree built while parsing
5782 */
5783
5784xmlEnumerationPtr
5785xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5786 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005787 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005788
5789 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005790 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005791 return(NULL);
5792 }
5793 SHRINK;
5794 do {
5795 NEXT;
5796 SKIP_BLANKS;
5797 name = xmlParseNmtoken(ctxt);
5798 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005799 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005800 return(ret);
5801 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005802 tmp = ret;
5803 while (tmp != NULL) {
5804 if (xmlStrEqual(name, tmp->name)) {
5805 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5806 "standalone: attribute enumeration value token %s duplicated\n",
5807 name, NULL);
5808 if (!xmlDictOwns(ctxt->dict, name))
5809 xmlFree(name);
5810 break;
5811 }
5812 tmp = tmp->next;
5813 }
5814 if (tmp == NULL) {
5815 cur = xmlCreateEnumeration(name);
5816 if (!xmlDictOwns(ctxt->dict, name))
5817 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005818 if (cur == NULL) {
5819 xmlFreeEnumeration(ret);
5820 return(NULL);
5821 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005822 if (last == NULL) ret = last = cur;
5823 else {
5824 last->next = cur;
5825 last = cur;
5826 }
Owen Taylor3473f882001-02-23 17:55:21 +00005827 }
5828 SKIP_BLANKS;
5829 } while (RAW == '|');
5830 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005831 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005832 return(ret);
5833 }
5834 NEXT;
5835 return(ret);
5836}
5837
5838/**
5839 * xmlParseEnumeratedType:
5840 * @ctxt: an XML parser context
5841 * @tree: the enumeration tree built while parsing
5842 *
5843 * parse an Enumerated attribute type.
5844 *
5845 * [57] EnumeratedType ::= NotationType | Enumeration
5846 *
5847 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5848 *
5849 *
5850 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5851 */
5852
5853int
5854xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005855 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005856 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005857 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005858 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5859 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005860 return(0);
5861 }
5862 SKIP_BLANKS;
5863 *tree = xmlParseNotationType(ctxt);
5864 if (*tree == NULL) return(0);
5865 return(XML_ATTRIBUTE_NOTATION);
5866 }
5867 *tree = xmlParseEnumerationType(ctxt);
5868 if (*tree == NULL) return(0);
5869 return(XML_ATTRIBUTE_ENUMERATION);
5870}
5871
5872/**
5873 * xmlParseAttributeType:
5874 * @ctxt: an XML parser context
5875 * @tree: the enumeration tree built while parsing
5876 *
5877 * parse the Attribute list def for an element
5878 *
5879 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5880 *
5881 * [55] StringType ::= 'CDATA'
5882 *
5883 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5884 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5885 *
5886 * Validity constraints for attribute values syntax are checked in
5887 * xmlValidateAttributeValue()
5888 *
5889 * [ VC: ID ]
5890 * Values of type ID must match the Name production. A name must not
5891 * appear more than once in an XML document as a value of this type;
5892 * i.e., ID values must uniquely identify the elements which bear them.
5893 *
5894 * [ VC: One ID per Element Type ]
5895 * No element type may have more than one ID attribute specified.
5896 *
5897 * [ VC: ID Attribute Default ]
5898 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5899 *
5900 * [ VC: IDREF ]
5901 * Values of type IDREF must match the Name production, and values
5902 * of type IDREFS must match Names; each IDREF Name must match the value
5903 * of an ID attribute on some element in the XML document; i.e. IDREF
5904 * values must match the value of some ID attribute.
5905 *
5906 * [ VC: Entity Name ]
5907 * Values of type ENTITY must match the Name production, values
5908 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005909 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005910 *
5911 * [ VC: Name Token ]
5912 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005913 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005914 *
5915 * Returns the attribute type
5916 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005917int
Owen Taylor3473f882001-02-23 17:55:21 +00005918xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5919 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005920 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005921 SKIP(5);
5922 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005923 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005924 SKIP(6);
5925 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005926 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005927 SKIP(5);
5928 return(XML_ATTRIBUTE_IDREF);
5929 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5930 SKIP(2);
5931 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005932 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005933 SKIP(6);
5934 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005935 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005936 SKIP(8);
5937 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005938 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005939 SKIP(8);
5940 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005941 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005942 SKIP(7);
5943 return(XML_ATTRIBUTE_NMTOKEN);
5944 }
5945 return(xmlParseEnumeratedType(ctxt, tree));
5946}
5947
5948/**
5949 * xmlParseAttributeListDecl:
5950 * @ctxt: an XML parser context
5951 *
5952 * : parse the Attribute list def for an element
5953 *
5954 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5955 *
5956 * [53] AttDef ::= S Name S AttType S DefaultDecl
5957 *
5958 */
5959void
5960xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005961 const xmlChar *elemName;
5962 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005963 xmlEnumerationPtr tree;
5964
Daniel Veillarda07050d2003-10-19 14:46:32 +00005965 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005966 xmlParserInputPtr input = ctxt->input;
5967
5968 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005969 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005970 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005971 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005972 }
5973 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005974 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005975 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005976 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5977 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005978 return;
5979 }
5980 SKIP_BLANKS;
5981 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005982 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005983 const xmlChar *check = CUR_PTR;
5984 int type;
5985 int def;
5986 xmlChar *defaultValue = NULL;
5987
5988 GROW;
5989 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005990 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005991 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005992 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5993 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005994 break;
5995 }
5996 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005997 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005998 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005999 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006000 break;
6001 }
6002 SKIP_BLANKS;
6003
6004 type = xmlParseAttributeType(ctxt, &tree);
6005 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006006 break;
6007 }
6008
6009 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006010 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006011 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6012 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006013 if (tree != NULL)
6014 xmlFreeEnumeration(tree);
6015 break;
6016 }
6017 SKIP_BLANKS;
6018
6019 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6020 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006021 if (defaultValue != NULL)
6022 xmlFree(defaultValue);
6023 if (tree != NULL)
6024 xmlFreeEnumeration(tree);
6025 break;
6026 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006027 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6028 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006029
6030 GROW;
6031 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006032 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006033 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006034 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006035 if (defaultValue != NULL)
6036 xmlFree(defaultValue);
6037 if (tree != NULL)
6038 xmlFreeEnumeration(tree);
6039 break;
6040 }
6041 SKIP_BLANKS;
6042 }
6043 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006044 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6045 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006046 if (defaultValue != NULL)
6047 xmlFree(defaultValue);
6048 if (tree != NULL)
6049 xmlFreeEnumeration(tree);
6050 break;
6051 }
6052 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6053 (ctxt->sax->attributeDecl != NULL))
6054 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6055 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006056 else if (tree != NULL)
6057 xmlFreeEnumeration(tree);
6058
6059 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006060 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006061 (def != XML_ATTRIBUTE_REQUIRED)) {
6062 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6063 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006064 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006065 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6066 }
Owen Taylor3473f882001-02-23 17:55:21 +00006067 if (defaultValue != NULL)
6068 xmlFree(defaultValue);
6069 GROW;
6070 }
6071 if (RAW == '>') {
6072 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006073 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6074 "Attribute list declaration doesn't start and stop in the same entity\n",
6075 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006076 }
6077 NEXT;
6078 }
Owen Taylor3473f882001-02-23 17:55:21 +00006079 }
6080}
6081
6082/**
6083 * xmlParseElementMixedContentDecl:
6084 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006085 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006086 *
6087 * parse the declaration for a Mixed Element content
6088 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006089 *
Owen Taylor3473f882001-02-23 17:55:21 +00006090 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6091 * '(' S? '#PCDATA' S? ')'
6092 *
6093 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6094 *
6095 * [ VC: No Duplicate Types ]
6096 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006097 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006098 *
6099 * returns: the list of the xmlElementContentPtr describing the element choices
6100 */
6101xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006102xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006103 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006104 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006105
6106 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006107 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006108 SKIP(7);
6109 SKIP_BLANKS;
6110 SHRINK;
6111 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006112 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006113 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6114"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006115 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006116 }
Owen Taylor3473f882001-02-23 17:55:21 +00006117 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006118 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006119 if (ret == NULL)
6120 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006121 if (RAW == '*') {
6122 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6123 NEXT;
6124 }
6125 return(ret);
6126 }
6127 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006128 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006129 if (ret == NULL) return(NULL);
6130 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006131 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006132 NEXT;
6133 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006134 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006135 if (ret == NULL) return(NULL);
6136 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006137 if (cur != NULL)
6138 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006139 cur = ret;
6140 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006141 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006142 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006143 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006144 if (n->c1 != NULL)
6145 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006146 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006147 if (n != NULL)
6148 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006149 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006150 }
6151 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006152 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006154 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006155 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006156 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006157 return(NULL);
6158 }
6159 SKIP_BLANKS;
6160 GROW;
6161 }
6162 if ((RAW == ')') && (NXT(1) == '*')) {
6163 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006164 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006165 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006166 if (cur->c2 != NULL)
6167 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006168 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006169 if (ret != NULL)
6170 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006171 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006172 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6173"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006174 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006175 }
Owen Taylor3473f882001-02-23 17:55:21 +00006176 SKIP(2);
6177 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006178 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006179 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006180 return(NULL);
6181 }
6182
6183 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006184 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006185 }
6186 return(ret);
6187}
6188
6189/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006190 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006191 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006192 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006193 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006194 *
6195 * parse the declaration for a Mixed Element content
6196 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006197 *
Owen Taylor3473f882001-02-23 17:55:21 +00006198 *
6199 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6200 *
6201 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6202 *
6203 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6204 *
6205 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6206 *
6207 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6208 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006209 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006210 * opening or closing parentheses in a choice, seq, or Mixed
6211 * construct is contained in the replacement text for a parameter
6212 * entity, both must be contained in the same replacement text. For
6213 * interoperability, if a parameter-entity reference appears in a
6214 * choice, seq, or Mixed construct, its replacement text should not
6215 * be empty, and neither the first nor last non-blank character of
6216 * the replacement text should be a connector (| or ,).
6217 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006218 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006219 * hierarchy.
6220 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006221static xmlElementContentPtr
6222xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6223 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006224 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006225 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006226 xmlChar type = 0;
6227
Daniel Veillard489f9672009-08-10 16:49:30 +02006228 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6229 (depth > 2048)) {
6230 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6231"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6232 depth);
6233 return(NULL);
6234 }
Owen Taylor3473f882001-02-23 17:55:21 +00006235 SKIP_BLANKS;
6236 GROW;
6237 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006238 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006239
Owen Taylor3473f882001-02-23 17:55:21 +00006240 /* Recurse on first child */
6241 NEXT;
6242 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006243 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6244 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006245 SKIP_BLANKS;
6246 GROW;
6247 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006248 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006249 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006250 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006251 return(NULL);
6252 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006253 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006254 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006255 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006256 return(NULL);
6257 }
Owen Taylor3473f882001-02-23 17:55:21 +00006258 GROW;
6259 if (RAW == '?') {
6260 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6261 NEXT;
6262 } else if (RAW == '*') {
6263 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6264 NEXT;
6265 } else if (RAW == '+') {
6266 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6267 NEXT;
6268 } else {
6269 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6270 }
Owen Taylor3473f882001-02-23 17:55:21 +00006271 GROW;
6272 }
6273 SKIP_BLANKS;
6274 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006275 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006276 /*
6277 * Each loop we parse one separator and one element.
6278 */
6279 if (RAW == ',') {
6280 if (type == 0) type = CUR;
6281
6282 /*
6283 * Detect "Name | Name , Name" error
6284 */
6285 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006286 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006287 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006288 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006289 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006290 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006291 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006292 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006293 return(NULL);
6294 }
6295 NEXT;
6296
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006297 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006298 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006299 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006300 xmlFreeDocElementContent(ctxt->myDoc, last);
6301 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006302 return(NULL);
6303 }
6304 if (last == NULL) {
6305 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006306 if (ret != NULL)
6307 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006308 ret = cur = op;
6309 } else {
6310 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006311 if (op != NULL)
6312 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006313 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006314 if (last != NULL)
6315 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006316 cur =op;
6317 last = NULL;
6318 }
6319 } else if (RAW == '|') {
6320 if (type == 0) type = CUR;
6321
6322 /*
6323 * Detect "Name , Name | Name" error
6324 */
6325 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006326 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006327 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006328 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006329 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006330 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006331 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006332 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006333 return(NULL);
6334 }
6335 NEXT;
6336
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006337 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006338 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006339 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006340 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006341 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006342 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006343 return(NULL);
6344 }
6345 if (last == NULL) {
6346 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006347 if (ret != NULL)
6348 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006349 ret = cur = op;
6350 } else {
6351 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006352 if (op != NULL)
6353 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006354 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006355 if (last != NULL)
6356 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006357 cur =op;
6358 last = NULL;
6359 }
6360 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006361 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006362 if ((last != NULL) && (last != ret))
6363 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006364 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006365 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006366 return(NULL);
6367 }
6368 GROW;
6369 SKIP_BLANKS;
6370 GROW;
6371 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006372 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006373 /* Recurse on second child */
6374 NEXT;
6375 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006376 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6377 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006378 SKIP_BLANKS;
6379 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006380 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006381 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006382 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006383 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006384 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006385 return(NULL);
6386 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006387 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006388 if (last == NULL) {
6389 if (ret != NULL)
6390 xmlFreeDocElementContent(ctxt->myDoc, ret);
6391 return(NULL);
6392 }
Owen Taylor3473f882001-02-23 17:55:21 +00006393 if (RAW == '?') {
6394 last->ocur = XML_ELEMENT_CONTENT_OPT;
6395 NEXT;
6396 } else if (RAW == '*') {
6397 last->ocur = XML_ELEMENT_CONTENT_MULT;
6398 NEXT;
6399 } else if (RAW == '+') {
6400 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6401 NEXT;
6402 } else {
6403 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6404 }
6405 }
6406 SKIP_BLANKS;
6407 GROW;
6408 }
6409 if ((cur != NULL) && (last != NULL)) {
6410 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006411 if (last != NULL)
6412 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006413 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006414 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006415 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6416"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006417 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006418 }
Owen Taylor3473f882001-02-23 17:55:21 +00006419 NEXT;
6420 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006421 if (ret != NULL) {
6422 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6423 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6424 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6425 else
6426 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6427 }
Owen Taylor3473f882001-02-23 17:55:21 +00006428 NEXT;
6429 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006430 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006431 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006432 cur = ret;
6433 /*
6434 * Some normalization:
6435 * (a | b* | c?)* == (a | b | c)*
6436 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006437 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006438 if ((cur->c1 != NULL) &&
6439 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6440 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6441 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6442 if ((cur->c2 != NULL) &&
6443 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6444 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6445 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6446 cur = cur->c2;
6447 }
6448 }
Owen Taylor3473f882001-02-23 17:55:21 +00006449 NEXT;
6450 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006451 if (ret != NULL) {
6452 int found = 0;
6453
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006454 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6456 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006457 else
6458 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006459 /*
6460 * Some normalization:
6461 * (a | b*)+ == (a | b)*
6462 * (a | b?)+ == (a | b)*
6463 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006464 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006465 if ((cur->c1 != NULL) &&
6466 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6467 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6468 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6469 found = 1;
6470 }
6471 if ((cur->c2 != NULL) &&
6472 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6473 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6474 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6475 found = 1;
6476 }
6477 cur = cur->c2;
6478 }
6479 if (found)
6480 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6481 }
Owen Taylor3473f882001-02-23 17:55:21 +00006482 NEXT;
6483 }
6484 return(ret);
6485}
6486
6487/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006488 * xmlParseElementChildrenContentDecl:
6489 * @ctxt: an XML parser context
6490 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006491 *
6492 * parse the declaration for a Mixed Element content
6493 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6494 *
6495 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6496 *
6497 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6498 *
6499 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6500 *
6501 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6502 *
6503 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6504 * TODO Parameter-entity replacement text must be properly nested
6505 * with parenthesized groups. That is to say, if either of the
6506 * opening or closing parentheses in a choice, seq, or Mixed
6507 * construct is contained in the replacement text for a parameter
6508 * entity, both must be contained in the same replacement text. For
6509 * interoperability, if a parameter-entity reference appears in a
6510 * choice, seq, or Mixed construct, its replacement text should not
6511 * be empty, and neither the first nor last non-blank character of
6512 * the replacement text should be a connector (| or ,).
6513 *
6514 * Returns the tree of xmlElementContentPtr describing the element
6515 * hierarchy.
6516 */
6517xmlElementContentPtr
6518xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6519 /* stub left for API/ABI compat */
6520 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6521}
6522
6523/**
Owen Taylor3473f882001-02-23 17:55:21 +00006524 * xmlParseElementContentDecl:
6525 * @ctxt: an XML parser context
6526 * @name: the name of the element being defined.
6527 * @result: the Element Content pointer will be stored here if any
6528 *
6529 * parse the declaration for an Element content either Mixed or Children,
6530 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006531 *
Owen Taylor3473f882001-02-23 17:55:21 +00006532 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6533 *
6534 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6535 */
6536
6537int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006538xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006539 xmlElementContentPtr *result) {
6540
6541 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006542 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006543 int res;
6544
6545 *result = NULL;
6546
6547 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006548 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006549 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006550 return(-1);
6551 }
6552 NEXT;
6553 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006554 if (ctxt->instate == XML_PARSER_EOF)
6555 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006556 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006557 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006558 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006559 res = XML_ELEMENT_TYPE_MIXED;
6560 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006561 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006562 res = XML_ELEMENT_TYPE_ELEMENT;
6563 }
Owen Taylor3473f882001-02-23 17:55:21 +00006564 SKIP_BLANKS;
6565 *result = tree;
6566 return(res);
6567}
6568
6569/**
6570 * xmlParseElementDecl:
6571 * @ctxt: an XML parser context
6572 *
6573 * parse an Element declaration.
6574 *
6575 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6576 *
6577 * [ VC: Unique Element Type Declaration ]
6578 * No element type may be declared more than once
6579 *
6580 * Returns the type of the element, or -1 in case of error
6581 */
6582int
6583xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006584 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006585 int ret = -1;
6586 xmlElementContentPtr content = NULL;
6587
Daniel Veillard4c778d82005-01-23 17:37:44 +00006588 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006589 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006590 xmlParserInputPtr input = ctxt->input;
6591
6592 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006593 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006594 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6595 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006596 }
6597 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006598 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006599 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006600 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6601 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006602 return(-1);
6603 }
6604 while ((RAW == 0) && (ctxt->inputNr > 1))
6605 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006606 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006607 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6608 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006609 }
6610 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006611 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006612 SKIP(5);
6613 /*
6614 * Element must always be empty.
6615 */
6616 ret = XML_ELEMENT_TYPE_EMPTY;
6617 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6618 (NXT(2) == 'Y')) {
6619 SKIP(3);
6620 /*
6621 * Element is a generic container.
6622 */
6623 ret = XML_ELEMENT_TYPE_ANY;
6624 } else if (RAW == '(') {
6625 ret = xmlParseElementContentDecl(ctxt, name, &content);
6626 } else {
6627 /*
6628 * [ WFC: PEs in Internal Subset ] error handling.
6629 */
6630 if ((RAW == '%') && (ctxt->external == 0) &&
6631 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006632 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006633 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006634 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006635 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006636 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6637 }
Owen Taylor3473f882001-02-23 17:55:21 +00006638 return(-1);
6639 }
6640
6641 SKIP_BLANKS;
6642 /*
6643 * Pop-up of finished entities.
6644 */
6645 while ((RAW == 0) && (ctxt->inputNr > 1))
6646 xmlPopInput(ctxt);
6647 SKIP_BLANKS;
6648
6649 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006650 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006651 if (content != NULL) {
6652 xmlFreeDocElementContent(ctxt->myDoc, content);
6653 }
Owen Taylor3473f882001-02-23 17:55:21 +00006654 } else {
6655 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006656 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6657 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006658 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006659
Owen Taylor3473f882001-02-23 17:55:21 +00006660 NEXT;
6661 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006662 (ctxt->sax->elementDecl != NULL)) {
6663 if (content != NULL)
6664 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006665 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6666 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006667 if ((content != NULL) && (content->parent == NULL)) {
6668 /*
6669 * this is a trick: if xmlAddElementDecl is called,
6670 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006671 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006672 * interfaces or change the API/ABI
6673 */
6674 xmlFreeDocElementContent(ctxt->myDoc, content);
6675 }
6676 } else if (content != NULL) {
6677 xmlFreeDocElementContent(ctxt->myDoc, content);
6678 }
Owen Taylor3473f882001-02-23 17:55:21 +00006679 }
Owen Taylor3473f882001-02-23 17:55:21 +00006680 }
6681 return(ret);
6682}
6683
6684/**
Owen Taylor3473f882001-02-23 17:55:21 +00006685 * xmlParseConditionalSections
6686 * @ctxt: an XML parser context
6687 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006688 * [61] conditionalSect ::= includeSect | ignoreSect
6689 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006690 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6691 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6692 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6693 */
6694
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006695static void
Owen Taylor3473f882001-02-23 17:55:21 +00006696xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006697 int id = ctxt->input->id;
6698
Owen Taylor3473f882001-02-23 17:55:21 +00006699 SKIP(3);
6700 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006701 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006702 SKIP(7);
6703 SKIP_BLANKS;
6704 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006705 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006706 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006707 if (ctxt->input->id != id) {
6708 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6709 "All markup of the conditional section is not in the same entity\n",
6710 NULL, NULL);
6711 }
Owen Taylor3473f882001-02-23 17:55:21 +00006712 NEXT;
6713 }
6714 if (xmlParserDebugEntities) {
6715 if ((ctxt->input != NULL) && (ctxt->input->filename))
6716 xmlGenericError(xmlGenericErrorContext,
6717 "%s(%d): ", ctxt->input->filename,
6718 ctxt->input->line);
6719 xmlGenericError(xmlGenericErrorContext,
6720 "Entering INCLUDE Conditional Section\n");
6721 }
6722
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006723 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6724 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006725 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006726 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006727
6728 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6729 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006730 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006731 NEXT;
6732 } else if (RAW == '%') {
6733 xmlParsePEReference(ctxt);
6734 } else
6735 xmlParseMarkupDecl(ctxt);
6736
6737 /*
6738 * Pop-up of finished entities.
6739 */
6740 while ((RAW == 0) && (ctxt->inputNr > 1))
6741 xmlPopInput(ctxt);
6742
Daniel Veillardfdc91562002-07-01 21:52:03 +00006743 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006744 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006745 break;
6746 }
6747 }
6748 if (xmlParserDebugEntities) {
6749 if ((ctxt->input != NULL) && (ctxt->input->filename))
6750 xmlGenericError(xmlGenericErrorContext,
6751 "%s(%d): ", ctxt->input->filename,
6752 ctxt->input->line);
6753 xmlGenericError(xmlGenericErrorContext,
6754 "Leaving INCLUDE Conditional Section\n");
6755 }
6756
Daniel Veillarda07050d2003-10-19 14:46:32 +00006757 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006758 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006759 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006760 int depth = 0;
6761
6762 SKIP(6);
6763 SKIP_BLANKS;
6764 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006765 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006766 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006767 if (ctxt->input->id != id) {
6768 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6769 "All markup of the conditional section is not in the same entity\n",
6770 NULL, NULL);
6771 }
Owen Taylor3473f882001-02-23 17:55:21 +00006772 NEXT;
6773 }
6774 if (xmlParserDebugEntities) {
6775 if ((ctxt->input != NULL) && (ctxt->input->filename))
6776 xmlGenericError(xmlGenericErrorContext,
6777 "%s(%d): ", ctxt->input->filename,
6778 ctxt->input->line);
6779 xmlGenericError(xmlGenericErrorContext,
6780 "Entering IGNORE Conditional Section\n");
6781 }
6782
6783 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006784 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006785 * But disable SAX event generating DTD building in the meantime
6786 */
6787 state = ctxt->disableSAX;
6788 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006789 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006790 ctxt->instate = XML_PARSER_IGNORE;
6791
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006792 while (((depth >= 0) && (RAW != 0)) &&
6793 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006794 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6795 depth++;
6796 SKIP(3);
6797 continue;
6798 }
6799 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6800 if (--depth >= 0) SKIP(3);
6801 continue;
6802 }
6803 NEXT;
6804 continue;
6805 }
6806
6807 ctxt->disableSAX = state;
6808 ctxt->instate = instate;
6809
6810 if (xmlParserDebugEntities) {
6811 if ((ctxt->input != NULL) && (ctxt->input->filename))
6812 xmlGenericError(xmlGenericErrorContext,
6813 "%s(%d): ", ctxt->input->filename,
6814 ctxt->input->line);
6815 xmlGenericError(xmlGenericErrorContext,
6816 "Leaving IGNORE Conditional Section\n");
6817 }
6818
6819 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006820 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006821 }
6822
6823 if (RAW == 0)
6824 SHRINK;
6825
6826 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006827 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006828 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006829 if (ctxt->input->id != id) {
6830 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6831 "All markup of the conditional section is not in the same entity\n",
6832 NULL, NULL);
6833 }
Owen Taylor3473f882001-02-23 17:55:21 +00006834 SKIP(3);
6835 }
6836}
6837
6838/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006839 * xmlParseMarkupDecl:
6840 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006841 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006842 * parse Markup declarations
6843 *
6844 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6845 * NotationDecl | PI | Comment
6846 *
6847 * [ VC: Proper Declaration/PE Nesting ]
6848 * Parameter-entity replacement text must be properly nested with
6849 * markup declarations. That is to say, if either the first character
6850 * or the last character of a markup declaration (markupdecl above) is
6851 * contained in the replacement text for a parameter-entity reference,
6852 * both must be contained in the same replacement text.
6853 *
6854 * [ WFC: PEs in Internal Subset ]
6855 * In the internal DTD subset, parameter-entity references can occur
6856 * only where markup declarations can occur, not within markup declarations.
6857 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006858 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006859 */
6860void
6861xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6862 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006863 if (CUR == '<') {
6864 if (NXT(1) == '!') {
6865 switch (NXT(2)) {
6866 case 'E':
6867 if (NXT(3) == 'L')
6868 xmlParseElementDecl(ctxt);
6869 else if (NXT(3) == 'N')
6870 xmlParseEntityDecl(ctxt);
6871 break;
6872 case 'A':
6873 xmlParseAttributeListDecl(ctxt);
6874 break;
6875 case 'N':
6876 xmlParseNotationDecl(ctxt);
6877 break;
6878 case '-':
6879 xmlParseComment(ctxt);
6880 break;
6881 default:
6882 /* there is an error but it will be detected later */
6883 break;
6884 }
6885 } else if (NXT(1) == '?') {
6886 xmlParsePI(ctxt);
6887 }
6888 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006889 /*
6890 * This is only for internal subset. On external entities,
6891 * the replacement is done before parsing stage
6892 */
6893 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6894 xmlParsePEReference(ctxt);
6895
6896 /*
6897 * Conditional sections are allowed from entities included
6898 * by PE References in the internal subset.
6899 */
6900 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6901 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6902 xmlParseConditionalSections(ctxt);
6903 }
6904 }
6905
6906 ctxt->instate = XML_PARSER_DTD;
6907}
6908
6909/**
6910 * xmlParseTextDecl:
6911 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006912 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006913 * parse an XML declaration header for external entities
6914 *
6915 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006916 */
6917
6918void
6919xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6920 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006921 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006922
6923 /*
6924 * We know that '<?xml' is here.
6925 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006926 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006927 SKIP(5);
6928 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006929 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006930 return;
6931 }
6932
William M. Brack76e95df2003-10-18 16:20:14 +00006933 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006934 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6935 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006936 }
6937 SKIP_BLANKS;
6938
6939 /*
6940 * We may have the VersionInfo here.
6941 */
6942 version = xmlParseVersionInfo(ctxt);
6943 if (version == NULL)
6944 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006945 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006946 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006947 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6948 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006949 }
6950 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006951 ctxt->input->version = version;
6952
6953 /*
6954 * We must have the encoding declaration
6955 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006956 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006957 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6958 /*
6959 * The XML REC instructs us to stop parsing right here
6960 */
6961 return;
6962 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006963 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6964 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6965 "Missing encoding in text declaration\n");
6966 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006967
6968 SKIP_BLANKS;
6969 if ((RAW == '?') && (NXT(1) == '>')) {
6970 SKIP(2);
6971 } else if (RAW == '>') {
6972 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006973 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006974 NEXT;
6975 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006976 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006977 MOVETO_ENDTAG(CUR_PTR);
6978 NEXT;
6979 }
6980}
6981
6982/**
Owen Taylor3473f882001-02-23 17:55:21 +00006983 * xmlParseExternalSubset:
6984 * @ctxt: an XML parser context
6985 * @ExternalID: the external identifier
6986 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006987 *
Owen Taylor3473f882001-02-23 17:55:21 +00006988 * parse Markup declarations from an external subset
6989 *
6990 * [30] extSubset ::= textDecl? extSubsetDecl
6991 *
6992 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6993 */
6994void
6995xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6996 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006997 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006998 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006999
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007000 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007001 (ctxt->input->end - ctxt->input->cur >= 4)) {
7002 xmlChar start[4];
7003 xmlCharEncoding enc;
7004
7005 start[0] = RAW;
7006 start[1] = NXT(1);
7007 start[2] = NXT(2);
7008 start[3] = NXT(3);
7009 enc = xmlDetectCharEncoding(start, 4);
7010 if (enc != XML_CHAR_ENCODING_NONE)
7011 xmlSwitchEncoding(ctxt, enc);
7012 }
7013
Daniel Veillarda07050d2003-10-19 14:46:32 +00007014 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007015 xmlParseTextDecl(ctxt);
7016 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7017 /*
7018 * The XML REC instructs us to stop parsing right here
7019 */
7020 ctxt->instate = XML_PARSER_EOF;
7021 return;
7022 }
7023 }
7024 if (ctxt->myDoc == NULL) {
7025 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007026 if (ctxt->myDoc == NULL) {
7027 xmlErrMemory(ctxt, "New Doc failed");
7028 return;
7029 }
7030 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007031 }
7032 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7033 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7034
7035 ctxt->instate = XML_PARSER_DTD;
7036 ctxt->external = 1;
7037 while (((RAW == '<') && (NXT(1) == '?')) ||
7038 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007039 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007040 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007041 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007042
7043 GROW;
7044 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7045 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007046 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007047 NEXT;
7048 } else if (RAW == '%') {
7049 xmlParsePEReference(ctxt);
7050 } else
7051 xmlParseMarkupDecl(ctxt);
7052
7053 /*
7054 * Pop-up of finished entities.
7055 */
7056 while ((RAW == 0) && (ctxt->inputNr > 1))
7057 xmlPopInput(ctxt);
7058
Daniel Veillardfdc91562002-07-01 21:52:03 +00007059 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007060 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007061 break;
7062 }
7063 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007064
Owen Taylor3473f882001-02-23 17:55:21 +00007065 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007066 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007067 }
7068
7069}
7070
7071/**
7072 * xmlParseReference:
7073 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007074 *
Owen Taylor3473f882001-02-23 17:55:21 +00007075 * parse and handle entity references in content, depending on the SAX
7076 * interface, this may end-up in a call to character() if this is a
7077 * CharRef, a predefined entity, if there is no reference() callback.
7078 * or if the parser was asked to switch to that mode.
7079 *
7080 * [67] Reference ::= EntityRef | CharRef
7081 */
7082void
7083xmlParseReference(xmlParserCtxtPtr ctxt) {
7084 xmlEntityPtr ent;
7085 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007086 int was_checked;
7087 xmlNodePtr list = NULL;
7088 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007089
Daniel Veillard0161e632008-08-28 15:36:32 +00007090
7091 if (RAW != '&')
7092 return;
7093
7094 /*
7095 * Simple case of a CharRef
7096 */
Owen Taylor3473f882001-02-23 17:55:21 +00007097 if (NXT(1) == '#') {
7098 int i = 0;
7099 xmlChar out[10];
7100 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007101 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007102
Daniel Veillarddc171602008-03-26 17:41:38 +00007103 if (value == 0)
7104 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007105 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7106 /*
7107 * So we are using non-UTF-8 buffers
7108 * Check that the char fit on 8bits, if not
7109 * generate a CharRef.
7110 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007111 if (value <= 0xFF) {
7112 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007113 out[1] = 0;
7114 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7115 (!ctxt->disableSAX))
7116 ctxt->sax->characters(ctxt->userData, out, 1);
7117 } else {
7118 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007119 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007120 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007121 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007122 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7123 (!ctxt->disableSAX))
7124 ctxt->sax->reference(ctxt->userData, out);
7125 }
7126 } else {
7127 /*
7128 * Just encode the value in UTF-8
7129 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007130 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007131 out[i] = 0;
7132 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7133 (!ctxt->disableSAX))
7134 ctxt->sax->characters(ctxt->userData, out, i);
7135 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007136 return;
7137 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007138
Daniel Veillard0161e632008-08-28 15:36:32 +00007139 /*
7140 * We are seeing an entity reference
7141 */
7142 ent = xmlParseEntityRef(ctxt);
7143 if (ent == NULL) return;
7144 if (!ctxt->wellFormed)
7145 return;
7146 was_checked = ent->checked;
7147
7148 /* special case of predefined entities */
7149 if ((ent->name == NULL) ||
7150 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7151 val = ent->content;
7152 if (val == NULL) return;
7153 /*
7154 * inline the entity.
7155 */
7156 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7157 (!ctxt->disableSAX))
7158 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7159 return;
7160 }
7161
7162 /*
7163 * The first reference to the entity trigger a parsing phase
7164 * where the ent->children is filled with the result from
7165 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007166 * Note: external parsed entities will not be loaded, it is not
7167 * required for a non-validating parser, unless the parsing option
7168 * of validating, or substituting entities were given. Doing so is
7169 * far more secure as the parser will only process data coming from
7170 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007171 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007172 if ((ent->checked == 0) &&
7173 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7174 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007175 unsigned long oldnbent = ctxt->nbentities;
7176
7177 /*
7178 * This is a bit hackish but this seems the best
7179 * way to make sure both SAX and DOM entity support
7180 * behaves okay.
7181 */
7182 void *user_data;
7183 if (ctxt->userData == ctxt)
7184 user_data = NULL;
7185 else
7186 user_data = ctxt->userData;
7187
7188 /*
7189 * Check that this entity is well formed
7190 * 4.3.2: An internal general parsed entity is well-formed
7191 * if its replacement text matches the production labeled
7192 * content.
7193 */
7194 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7195 ctxt->depth++;
7196 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7197 user_data, &list);
7198 ctxt->depth--;
7199
7200 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7201 ctxt->depth++;
7202 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7203 user_data, ctxt->depth, ent->URI,
7204 ent->ExternalID, &list);
7205 ctxt->depth--;
7206 } else {
7207 ret = XML_ERR_ENTITY_PE_INTERNAL;
7208 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7209 "invalid entity type found\n", NULL);
7210 }
7211
7212 /*
7213 * Store the number of entities needing parsing for this entity
7214 * content and do checkings
7215 */
7216 ent->checked = ctxt->nbentities - oldnbent;
7217 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007218 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007219 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007220 return;
7221 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007222 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007223 xmlFreeNodeList(list);
7224 return;
7225 }
Owen Taylor3473f882001-02-23 17:55:21 +00007226
Daniel Veillard0161e632008-08-28 15:36:32 +00007227 if ((ret == XML_ERR_OK) && (list != NULL)) {
7228 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7229 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7230 (ent->children == NULL)) {
7231 ent->children = list;
7232 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007233 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007234 * Prune it directly in the generated document
7235 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007236 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007237 if (((list->type == XML_TEXT_NODE) &&
7238 (list->next == NULL)) ||
7239 (ctxt->parseMode == XML_PARSE_READER)) {
7240 list->parent = (xmlNodePtr) ent;
7241 list = NULL;
7242 ent->owner = 1;
7243 } else {
7244 ent->owner = 0;
7245 while (list != NULL) {
7246 list->parent = (xmlNodePtr) ctxt->node;
7247 list->doc = ctxt->myDoc;
7248 if (list->next == NULL)
7249 ent->last = list;
7250 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007251 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007252 list = ent->children;
7253#ifdef LIBXML_LEGACY_ENABLED
7254 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7255 xmlAddEntityReference(ent, list, NULL);
7256#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007257 }
7258 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007259 ent->owner = 1;
7260 while (list != NULL) {
7261 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007262 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007263 if (list->next == NULL)
7264 ent->last = list;
7265 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007266 }
7267 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007268 } else {
7269 xmlFreeNodeList(list);
7270 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007271 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007272 } else if ((ret != XML_ERR_OK) &&
7273 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7274 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7275 "Entity '%s' failed to parse\n", ent->name);
7276 } else if (list != NULL) {
7277 xmlFreeNodeList(list);
7278 list = NULL;
7279 }
7280 if (ent->checked == 0)
7281 ent->checked = 1;
7282 } else if (ent->checked != 1) {
7283 ctxt->nbentities += ent->checked;
7284 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007285
Daniel Veillard0161e632008-08-28 15:36:32 +00007286 /*
7287 * Now that the entity content has been gathered
7288 * provide it to the application, this can take different forms based
7289 * on the parsing modes.
7290 */
7291 if (ent->children == NULL) {
7292 /*
7293 * Probably running in SAX mode and the callbacks don't
7294 * build the entity content. So unless we already went
7295 * though parsing for first checking go though the entity
7296 * content to generate callbacks associated to the entity
7297 */
7298 if (was_checked != 0) {
7299 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007300 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007301 * This is a bit hackish but this seems the best
7302 * way to make sure both SAX and DOM entity support
7303 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007304 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007305 if (ctxt->userData == ctxt)
7306 user_data = NULL;
7307 else
7308 user_data = ctxt->userData;
7309
7310 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7311 ctxt->depth++;
7312 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7313 ent->content, user_data, NULL);
7314 ctxt->depth--;
7315 } else if (ent->etype ==
7316 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7317 ctxt->depth++;
7318 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7319 ctxt->sax, user_data, ctxt->depth,
7320 ent->URI, ent->ExternalID, NULL);
7321 ctxt->depth--;
7322 } else {
7323 ret = XML_ERR_ENTITY_PE_INTERNAL;
7324 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7325 "invalid entity type found\n", NULL);
7326 }
7327 if (ret == XML_ERR_ENTITY_LOOP) {
7328 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7329 return;
7330 }
7331 }
7332 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7333 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7334 /*
7335 * Entity reference callback comes second, it's somewhat
7336 * superfluous but a compatibility to historical behaviour
7337 */
7338 ctxt->sax->reference(ctxt->userData, ent->name);
7339 }
7340 return;
7341 }
7342
7343 /*
7344 * If we didn't get any children for the entity being built
7345 */
7346 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7347 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7348 /*
7349 * Create a node.
7350 */
7351 ctxt->sax->reference(ctxt->userData, ent->name);
7352 return;
7353 }
7354
7355 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7356 /*
7357 * There is a problem on the handling of _private for entities
7358 * (bug 155816): Should we copy the content of the field from
7359 * the entity (possibly overwriting some value set by the user
7360 * when a copy is created), should we leave it alone, or should
7361 * we try to take care of different situations? The problem
7362 * is exacerbated by the usage of this field by the xmlReader.
7363 * To fix this bug, we look at _private on the created node
7364 * and, if it's NULL, we copy in whatever was in the entity.
7365 * If it's not NULL we leave it alone. This is somewhat of a
7366 * hack - maybe we should have further tests to determine
7367 * what to do.
7368 */
7369 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7370 /*
7371 * Seems we are generating the DOM content, do
7372 * a simple tree copy for all references except the first
7373 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007374 */
7375 if (((list == NULL) && (ent->owner == 0)) ||
7376 (ctxt->parseMode == XML_PARSE_READER)) {
7377 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7378
7379 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007380 * We are copying here, make sure there is no abuse
7381 */
7382 ctxt->sizeentcopy += ent->length;
7383 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7384 return;
7385
7386 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007387 * when operating on a reader, the entities definitions
7388 * are always owning the entities subtree.
7389 if (ctxt->parseMode == XML_PARSE_READER)
7390 ent->owner = 1;
7391 */
7392
7393 cur = ent->children;
7394 while (cur != NULL) {
7395 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7396 if (nw != NULL) {
7397 if (nw->_private == NULL)
7398 nw->_private = cur->_private;
7399 if (firstChild == NULL){
7400 firstChild = nw;
7401 }
7402 nw = xmlAddChild(ctxt->node, nw);
7403 }
7404 if (cur == ent->last) {
7405 /*
7406 * needed to detect some strange empty
7407 * node cases in the reader tests
7408 */
7409 if ((ctxt->parseMode == XML_PARSE_READER) &&
7410 (nw != NULL) &&
7411 (nw->type == XML_ELEMENT_NODE) &&
7412 (nw->children == NULL))
7413 nw->extra = 1;
7414
7415 break;
7416 }
7417 cur = cur->next;
7418 }
7419#ifdef LIBXML_LEGACY_ENABLED
7420 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7421 xmlAddEntityReference(ent, firstChild, nw);
7422#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007423 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007424 xmlNodePtr nw = NULL, cur, next, last,
7425 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007426
7427 /*
7428 * We are copying here, make sure there is no abuse
7429 */
7430 ctxt->sizeentcopy += ent->length;
7431 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7432 return;
7433
Daniel Veillard0161e632008-08-28 15:36:32 +00007434 /*
7435 * Copy the entity child list and make it the new
7436 * entity child list. The goal is to make sure any
7437 * ID or REF referenced will be the one from the
7438 * document content and not the entity copy.
7439 */
7440 cur = ent->children;
7441 ent->children = NULL;
7442 last = ent->last;
7443 ent->last = NULL;
7444 while (cur != NULL) {
7445 next = cur->next;
7446 cur->next = NULL;
7447 cur->parent = NULL;
7448 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7449 if (nw != NULL) {
7450 if (nw->_private == NULL)
7451 nw->_private = cur->_private;
7452 if (firstChild == NULL){
7453 firstChild = cur;
7454 }
7455 xmlAddChild((xmlNodePtr) ent, nw);
7456 xmlAddChild(ctxt->node, cur);
7457 }
7458 if (cur == last)
7459 break;
7460 cur = next;
7461 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007462 if (ent->owner == 0)
7463 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007464#ifdef LIBXML_LEGACY_ENABLED
7465 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7466 xmlAddEntityReference(ent, firstChild, nw);
7467#endif /* LIBXML_LEGACY_ENABLED */
7468 } else {
7469 const xmlChar *nbktext;
7470
7471 /*
7472 * the name change is to avoid coalescing of the
7473 * node with a possible previous text one which
7474 * would make ent->children a dangling pointer
7475 */
7476 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7477 -1);
7478 if (ent->children->type == XML_TEXT_NODE)
7479 ent->children->name = nbktext;
7480 if ((ent->last != ent->children) &&
7481 (ent->last->type == XML_TEXT_NODE))
7482 ent->last->name = nbktext;
7483 xmlAddChildList(ctxt->node, ent->children);
7484 }
7485
7486 /*
7487 * This is to avoid a nasty side effect, see
7488 * characters() in SAX.c
7489 */
7490 ctxt->nodemem = 0;
7491 ctxt->nodelen = 0;
7492 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007493 }
7494 }
7495}
7496
7497/**
7498 * xmlParseEntityRef:
7499 * @ctxt: an XML parser context
7500 *
7501 * parse ENTITY references declarations
7502 *
7503 * [68] EntityRef ::= '&' Name ';'
7504 *
7505 * [ WFC: Entity Declared ]
7506 * In a document without any DTD, a document with only an internal DTD
7507 * subset which contains no parameter entity references, or a document
7508 * with "standalone='yes'", the Name given in the entity reference
7509 * must match that in an entity declaration, except that well-formed
7510 * documents need not declare any of the following entities: amp, lt,
7511 * gt, apos, quot. The declaration of a parameter entity must precede
7512 * any reference to it. Similarly, the declaration of a general entity
7513 * must precede any reference to it which appears in a default value in an
7514 * attribute-list declaration. Note that if entities are declared in the
7515 * external subset or in external parameter entities, a non-validating
7516 * processor is not obligated to read and process their declarations;
7517 * for such documents, the rule that an entity must be declared is a
7518 * well-formedness constraint only if standalone='yes'.
7519 *
7520 * [ WFC: Parsed Entity ]
7521 * An entity reference must not contain the name of an unparsed entity
7522 *
7523 * Returns the xmlEntityPtr if found, or NULL otherwise.
7524 */
7525xmlEntityPtr
7526xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007527 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007528 xmlEntityPtr ent = NULL;
7529
7530 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007531 if (ctxt->instate == XML_PARSER_EOF)
7532 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007533
Daniel Veillard0161e632008-08-28 15:36:32 +00007534 if (RAW != '&')
7535 return(NULL);
7536 NEXT;
7537 name = xmlParseName(ctxt);
7538 if (name == NULL) {
7539 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7540 "xmlParseEntityRef: no name\n");
7541 return(NULL);
7542 }
7543 if (RAW != ';') {
7544 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7545 return(NULL);
7546 }
7547 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007548
Daniel Veillard0161e632008-08-28 15:36:32 +00007549 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007550 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007551 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007552 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7553 ent = xmlGetPredefinedEntity(name);
7554 if (ent != NULL)
7555 return(ent);
7556 }
Owen Taylor3473f882001-02-23 17:55:21 +00007557
Daniel Veillard0161e632008-08-28 15:36:32 +00007558 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007559 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007560 */
7561 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007562
Daniel Veillard0161e632008-08-28 15:36:32 +00007563 /*
7564 * Ask first SAX for entity resolution, otherwise try the
7565 * entities which may have stored in the parser context.
7566 */
7567 if (ctxt->sax != NULL) {
7568 if (ctxt->sax->getEntity != NULL)
7569 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007570 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007571 (ctxt->options & XML_PARSE_OLDSAX))
7572 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007573 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7574 (ctxt->userData==ctxt)) {
7575 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007576 }
7577 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007578 /*
7579 * [ WFC: Entity Declared ]
7580 * In a document without any DTD, a document with only an
7581 * internal DTD subset which contains no parameter entity
7582 * references, or a document with "standalone='yes'", the
7583 * Name given in the entity reference must match that in an
7584 * entity declaration, except that well-formed documents
7585 * need not declare any of the following entities: amp, lt,
7586 * gt, apos, quot.
7587 * The declaration of a parameter entity must precede any
7588 * reference to it.
7589 * Similarly, the declaration of a general entity must
7590 * precede any reference to it which appears in a default
7591 * value in an attribute-list declaration. Note that if
7592 * entities are declared in the external subset or in
7593 * external parameter entities, a non-validating processor
7594 * is not obligated to read and process their declarations;
7595 * for such documents, the rule that an entity must be
7596 * declared is a well-formedness constraint only if
7597 * standalone='yes'.
7598 */
7599 if (ent == NULL) {
7600 if ((ctxt->standalone == 1) ||
7601 ((ctxt->hasExternalSubset == 0) &&
7602 (ctxt->hasPErefs == 0))) {
7603 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7604 "Entity '%s' not defined\n", name);
7605 } else {
7606 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7607 "Entity '%s' not defined\n", name);
7608 if ((ctxt->inSubset == 0) &&
7609 (ctxt->sax != NULL) &&
7610 (ctxt->sax->reference != NULL)) {
7611 ctxt->sax->reference(ctxt->userData, name);
7612 }
7613 }
7614 ctxt->valid = 0;
7615 }
7616
7617 /*
7618 * [ WFC: Parsed Entity ]
7619 * An entity reference must not contain the name of an
7620 * unparsed entity
7621 */
7622 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7623 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7624 "Entity reference to unparsed entity %s\n", name);
7625 }
7626
7627 /*
7628 * [ WFC: No External Entity References ]
7629 * Attribute values cannot contain direct or indirect
7630 * entity references to external entities.
7631 */
7632 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7633 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7634 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7635 "Attribute references external entity '%s'\n", name);
7636 }
7637 /*
7638 * [ WFC: No < in Attribute Values ]
7639 * The replacement text of any entity referred to directly or
7640 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007641 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007642 */
7643 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7644 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007645 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007646 (xmlStrchr(ent->content, '<'))) {
7647 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7648 "'<' in entity '%s' is not allowed in attributes values\n", name);
7649 }
7650
7651 /*
7652 * Internal check, no parameter entities here ...
7653 */
7654 else {
7655 switch (ent->etype) {
7656 case XML_INTERNAL_PARAMETER_ENTITY:
7657 case XML_EXTERNAL_PARAMETER_ENTITY:
7658 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7659 "Attempt to reference the parameter entity '%s'\n",
7660 name);
7661 break;
7662 default:
7663 break;
7664 }
7665 }
7666
7667 /*
7668 * [ WFC: No Recursion ]
7669 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007670 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007671 * Done somewhere else
7672 */
Owen Taylor3473f882001-02-23 17:55:21 +00007673 return(ent);
7674}
7675
7676/**
7677 * xmlParseStringEntityRef:
7678 * @ctxt: an XML parser context
7679 * @str: a pointer to an index in the string
7680 *
7681 * parse ENTITY references declarations, but this version parses it from
7682 * a string value.
7683 *
7684 * [68] EntityRef ::= '&' Name ';'
7685 *
7686 * [ WFC: Entity Declared ]
7687 * In a document without any DTD, a document with only an internal DTD
7688 * subset which contains no parameter entity references, or a document
7689 * with "standalone='yes'", the Name given in the entity reference
7690 * must match that in an entity declaration, except that well-formed
7691 * documents need not declare any of the following entities: amp, lt,
7692 * gt, apos, quot. The declaration of a parameter entity must precede
7693 * any reference to it. Similarly, the declaration of a general entity
7694 * must precede any reference to it which appears in a default value in an
7695 * attribute-list declaration. Note that if entities are declared in the
7696 * external subset or in external parameter entities, a non-validating
7697 * processor is not obligated to read and process their declarations;
7698 * for such documents, the rule that an entity must be declared is a
7699 * well-formedness constraint only if standalone='yes'.
7700 *
7701 * [ WFC: Parsed Entity ]
7702 * An entity reference must not contain the name of an unparsed entity
7703 *
7704 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7705 * is updated to the current location in the string.
7706 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007707static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007708xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7709 xmlChar *name;
7710 const xmlChar *ptr;
7711 xmlChar cur;
7712 xmlEntityPtr ent = NULL;
7713
7714 if ((str == NULL) || (*str == NULL))
7715 return(NULL);
7716 ptr = *str;
7717 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007718 if (cur != '&')
7719 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007720
Daniel Veillard0161e632008-08-28 15:36:32 +00007721 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007722 name = xmlParseStringName(ctxt, &ptr);
7723 if (name == NULL) {
7724 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7725 "xmlParseStringEntityRef: no name\n");
7726 *str = ptr;
7727 return(NULL);
7728 }
7729 if (*ptr != ';') {
7730 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007731 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007732 *str = ptr;
7733 return(NULL);
7734 }
7735 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007736
Owen Taylor3473f882001-02-23 17:55:21 +00007737
Daniel Veillard0161e632008-08-28 15:36:32 +00007738 /*
7739 * Predefined entites override any extra definition
7740 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007741 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7742 ent = xmlGetPredefinedEntity(name);
7743 if (ent != NULL) {
7744 xmlFree(name);
7745 *str = ptr;
7746 return(ent);
7747 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007748 }
Owen Taylor3473f882001-02-23 17:55:21 +00007749
Daniel Veillard0161e632008-08-28 15:36:32 +00007750 /*
7751 * Increate the number of entity references parsed
7752 */
7753 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007754
Daniel Veillard0161e632008-08-28 15:36:32 +00007755 /*
7756 * Ask first SAX for entity resolution, otherwise try the
7757 * entities which may have stored in the parser context.
7758 */
7759 if (ctxt->sax != NULL) {
7760 if (ctxt->sax->getEntity != NULL)
7761 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007762 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7763 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007764 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7765 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007766 }
7767 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007768
7769 /*
7770 * [ WFC: Entity Declared ]
7771 * In a document without any DTD, a document with only an
7772 * internal DTD subset which contains no parameter entity
7773 * references, or a document with "standalone='yes'", the
7774 * Name given in the entity reference must match that in an
7775 * entity declaration, except that well-formed documents
7776 * need not declare any of the following entities: amp, lt,
7777 * gt, apos, quot.
7778 * The declaration of a parameter entity must precede any
7779 * reference to it.
7780 * Similarly, the declaration of a general entity must
7781 * precede any reference to it which appears in a default
7782 * value in an attribute-list declaration. Note that if
7783 * entities are declared in the external subset or in
7784 * external parameter entities, a non-validating processor
7785 * is not obligated to read and process their declarations;
7786 * for such documents, the rule that an entity must be
7787 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007788 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007789 */
7790 if (ent == NULL) {
7791 if ((ctxt->standalone == 1) ||
7792 ((ctxt->hasExternalSubset == 0) &&
7793 (ctxt->hasPErefs == 0))) {
7794 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7795 "Entity '%s' not defined\n", name);
7796 } else {
7797 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7798 "Entity '%s' not defined\n",
7799 name);
7800 }
7801 /* TODO ? check regressions ctxt->valid = 0; */
7802 }
7803
7804 /*
7805 * [ WFC: Parsed Entity ]
7806 * An entity reference must not contain the name of an
7807 * unparsed entity
7808 */
7809 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7810 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7811 "Entity reference to unparsed entity %s\n", name);
7812 }
7813
7814 /*
7815 * [ WFC: No External Entity References ]
7816 * Attribute values cannot contain direct or indirect
7817 * entity references to external entities.
7818 */
7819 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7820 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7821 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7822 "Attribute references external entity '%s'\n", name);
7823 }
7824 /*
7825 * [ WFC: No < in Attribute Values ]
7826 * The replacement text of any entity referred to directly or
7827 * indirectly in an attribute value (other than "&lt;") must
7828 * not contain a <.
7829 */
7830 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7831 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007832 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007833 (xmlStrchr(ent->content, '<'))) {
7834 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7835 "'<' in entity '%s' is not allowed in attributes values\n",
7836 name);
7837 }
7838
7839 /*
7840 * Internal check, no parameter entities here ...
7841 */
7842 else {
7843 switch (ent->etype) {
7844 case XML_INTERNAL_PARAMETER_ENTITY:
7845 case XML_EXTERNAL_PARAMETER_ENTITY:
7846 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7847 "Attempt to reference the parameter entity '%s'\n",
7848 name);
7849 break;
7850 default:
7851 break;
7852 }
7853 }
7854
7855 /*
7856 * [ WFC: No Recursion ]
7857 * A parsed entity must not contain a recursive reference
7858 * to itself, either directly or indirectly.
7859 * Done somewhere else
7860 */
7861
7862 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007863 *str = ptr;
7864 return(ent);
7865}
7866
7867/**
7868 * xmlParsePEReference:
7869 * @ctxt: an XML parser context
7870 *
7871 * parse PEReference declarations
7872 * The entity content is handled directly by pushing it's content as
7873 * a new input stream.
7874 *
7875 * [69] PEReference ::= '%' Name ';'
7876 *
7877 * [ WFC: No Recursion ]
7878 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007879 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007880 *
7881 * [ WFC: Entity Declared ]
7882 * In a document without any DTD, a document with only an internal DTD
7883 * subset which contains no parameter entity references, or a document
7884 * with "standalone='yes'", ... ... The declaration of a parameter
7885 * entity must precede any reference to it...
7886 *
7887 * [ VC: Entity Declared ]
7888 * In a document with an external subset or external parameter entities
7889 * with "standalone='no'", ... ... The declaration of a parameter entity
7890 * must precede any reference to it...
7891 *
7892 * [ WFC: In DTD ]
7893 * Parameter-entity references may only appear in the DTD.
7894 * NOTE: misleading but this is handled.
7895 */
7896void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007897xmlParsePEReference(xmlParserCtxtPtr ctxt)
7898{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007899 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007900 xmlEntityPtr entity = NULL;
7901 xmlParserInputPtr input;
7902
Daniel Veillard0161e632008-08-28 15:36:32 +00007903 if (RAW != '%')
7904 return;
7905 NEXT;
7906 name = xmlParseName(ctxt);
7907 if (name == NULL) {
7908 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7909 "xmlParsePEReference: no name\n");
7910 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007911 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007912 if (RAW != ';') {
7913 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7914 return;
7915 }
7916
7917 NEXT;
7918
7919 /*
7920 * Increate the number of entity references parsed
7921 */
7922 ctxt->nbentities++;
7923
7924 /*
7925 * Request the entity from SAX
7926 */
7927 if ((ctxt->sax != NULL) &&
7928 (ctxt->sax->getParameterEntity != NULL))
7929 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7930 name);
7931 if (entity == NULL) {
7932 /*
7933 * [ WFC: Entity Declared ]
7934 * In a document without any DTD, a document with only an
7935 * internal DTD subset which contains no parameter entity
7936 * references, or a document with "standalone='yes'", ...
7937 * ... The declaration of a parameter entity must precede
7938 * any reference to it...
7939 */
7940 if ((ctxt->standalone == 1) ||
7941 ((ctxt->hasExternalSubset == 0) &&
7942 (ctxt->hasPErefs == 0))) {
7943 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7944 "PEReference: %%%s; not found\n",
7945 name);
7946 } else {
7947 /*
7948 * [ VC: Entity Declared ]
7949 * In a document with an external subset or external
7950 * parameter entities with "standalone='no'", ...
7951 * ... The declaration of a parameter entity must
7952 * precede any reference to it...
7953 */
7954 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7955 "PEReference: %%%s; not found\n",
7956 name, NULL);
7957 ctxt->valid = 0;
7958 }
7959 } else {
7960 /*
7961 * Internal checking in case the entity quest barfed
7962 */
7963 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7964 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7965 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7966 "Internal: %%%s; is not a parameter entity\n",
7967 name, NULL);
7968 } else if (ctxt->input->free != deallocblankswrapper) {
7969 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7970 if (xmlPushInput(ctxt, input) < 0)
7971 return;
7972 } else {
7973 /*
7974 * TODO !!!
7975 * handle the extra spaces added before and after
7976 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7977 */
7978 input = xmlNewEntityInputStream(ctxt, entity);
7979 if (xmlPushInput(ctxt, input) < 0)
7980 return;
7981 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7982 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7983 (IS_BLANK_CH(NXT(5)))) {
7984 xmlParseTextDecl(ctxt);
7985 if (ctxt->errNo ==
7986 XML_ERR_UNSUPPORTED_ENCODING) {
7987 /*
7988 * The XML REC instructs us to stop parsing
7989 * right here
7990 */
7991 ctxt->instate = XML_PARSER_EOF;
7992 return;
7993 }
7994 }
7995 }
7996 }
7997 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007998}
7999
8000/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008001 * xmlLoadEntityContent:
8002 * @ctxt: an XML parser context
8003 * @entity: an unloaded system entity
8004 *
8005 * Load the original content of the given system entity from the
8006 * ExternalID/SystemID given. This is to be used for Included in Literal
8007 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8008 *
8009 * Returns 0 in case of success and -1 in case of failure
8010 */
8011static int
8012xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8013 xmlParserInputPtr input;
8014 xmlBufferPtr buf;
8015 int l, c;
8016 int count = 0;
8017
8018 if ((ctxt == NULL) || (entity == NULL) ||
8019 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8020 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8021 (entity->content != NULL)) {
8022 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8023 "xmlLoadEntityContent parameter error");
8024 return(-1);
8025 }
8026
8027 if (xmlParserDebugEntities)
8028 xmlGenericError(xmlGenericErrorContext,
8029 "Reading %s entity content input\n", entity->name);
8030
8031 buf = xmlBufferCreate();
8032 if (buf == NULL) {
8033 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8034 "xmlLoadEntityContent parameter error");
8035 return(-1);
8036 }
8037
8038 input = xmlNewEntityInputStream(ctxt, entity);
8039 if (input == NULL) {
8040 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8041 "xmlLoadEntityContent input error");
8042 xmlBufferFree(buf);
8043 return(-1);
8044 }
8045
8046 /*
8047 * Push the entity as the current input, read char by char
8048 * saving to the buffer until the end of the entity or an error
8049 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008050 if (xmlPushInput(ctxt, input) < 0) {
8051 xmlBufferFree(buf);
8052 return(-1);
8053 }
8054
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008055 GROW;
8056 c = CUR_CHAR(l);
8057 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8058 (IS_CHAR(c))) {
8059 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008060 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008061 count = 0;
8062 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008063 if (ctxt->instate == XML_PARSER_EOF) {
8064 xmlBufferFree(buf);
8065 return(-1);
8066 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008067 }
8068 NEXTL(l);
8069 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008070 if (c == 0) {
8071 count = 0;
8072 GROW;
8073 if (ctxt->instate == XML_PARSER_EOF) {
8074 xmlBufferFree(buf);
8075 return(-1);
8076 }
8077 c = CUR_CHAR(l);
8078 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008079 }
8080
8081 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8082 xmlPopInput(ctxt);
8083 } else if (!IS_CHAR(c)) {
8084 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8085 "xmlLoadEntityContent: invalid char value %d\n",
8086 c);
8087 xmlBufferFree(buf);
8088 return(-1);
8089 }
8090 entity->content = buf->content;
8091 buf->content = NULL;
8092 xmlBufferFree(buf);
8093
8094 return(0);
8095}
8096
8097/**
Owen Taylor3473f882001-02-23 17:55:21 +00008098 * xmlParseStringPEReference:
8099 * @ctxt: an XML parser context
8100 * @str: a pointer to an index in the string
8101 *
8102 * parse PEReference declarations
8103 *
8104 * [69] PEReference ::= '%' Name ';'
8105 *
8106 * [ WFC: No Recursion ]
8107 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008108 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008109 *
8110 * [ WFC: Entity Declared ]
8111 * In a document without any DTD, a document with only an internal DTD
8112 * subset which contains no parameter entity references, or a document
8113 * with "standalone='yes'", ... ... The declaration of a parameter
8114 * entity must precede any reference to it...
8115 *
8116 * [ VC: Entity Declared ]
8117 * In a document with an external subset or external parameter entities
8118 * with "standalone='no'", ... ... The declaration of a parameter entity
8119 * must precede any reference to it...
8120 *
8121 * [ WFC: In DTD ]
8122 * Parameter-entity references may only appear in the DTD.
8123 * NOTE: misleading but this is handled.
8124 *
8125 * Returns the string of the entity content.
8126 * str is updated to the current value of the index
8127 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008128static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008129xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8130 const xmlChar *ptr;
8131 xmlChar cur;
8132 xmlChar *name;
8133 xmlEntityPtr entity = NULL;
8134
8135 if ((str == NULL) || (*str == NULL)) return(NULL);
8136 ptr = *str;
8137 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008138 if (cur != '%')
8139 return(NULL);
8140 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008141 name = xmlParseStringName(ctxt, &ptr);
8142 if (name == NULL) {
8143 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8144 "xmlParseStringPEReference: no name\n");
8145 *str = ptr;
8146 return(NULL);
8147 }
8148 cur = *ptr;
8149 if (cur != ';') {
8150 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8151 xmlFree(name);
8152 *str = ptr;
8153 return(NULL);
8154 }
8155 ptr++;
8156
8157 /*
8158 * Increate the number of entity references parsed
8159 */
8160 ctxt->nbentities++;
8161
8162 /*
8163 * Request the entity from SAX
8164 */
8165 if ((ctxt->sax != NULL) &&
8166 (ctxt->sax->getParameterEntity != NULL))
8167 entity = ctxt->sax->getParameterEntity(ctxt->userData,
8168 name);
8169 if (entity == NULL) {
8170 /*
8171 * [ WFC: Entity Declared ]
8172 * In a document without any DTD, a document with only an
8173 * internal DTD subset which contains no parameter entity
8174 * references, or a document with "standalone='yes'", ...
8175 * ... The declaration of a parameter entity must precede
8176 * any reference to it...
8177 */
8178 if ((ctxt->standalone == 1) ||
8179 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8180 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8181 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008182 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008183 /*
8184 * [ VC: Entity Declared ]
8185 * In a document with an external subset or external
8186 * parameter entities with "standalone='no'", ...
8187 * ... The declaration of a parameter entity must
8188 * precede any reference to it...
8189 */
8190 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8191 "PEReference: %%%s; not found\n",
8192 name, NULL);
8193 ctxt->valid = 0;
8194 }
8195 } else {
8196 /*
8197 * Internal checking in case the entity quest barfed
8198 */
8199 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8200 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8201 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8202 "%%%s; is not a parameter entity\n",
8203 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008204 }
8205 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008206 ctxt->hasPErefs = 1;
8207 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008208 *str = ptr;
8209 return(entity);
8210}
8211
8212/**
8213 * xmlParseDocTypeDecl:
8214 * @ctxt: an XML parser context
8215 *
8216 * parse a DOCTYPE declaration
8217 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008218 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008219 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8220 *
8221 * [ VC: Root Element Type ]
8222 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008223 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008224 */
8225
8226void
8227xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008228 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008229 xmlChar *ExternalID = NULL;
8230 xmlChar *URI = NULL;
8231
8232 /*
8233 * We know that '<!DOCTYPE' has been detected.
8234 */
8235 SKIP(9);
8236
8237 SKIP_BLANKS;
8238
8239 /*
8240 * Parse the DOCTYPE name.
8241 */
8242 name = xmlParseName(ctxt);
8243 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008244 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8245 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008246 }
8247 ctxt->intSubName = name;
8248
8249 SKIP_BLANKS;
8250
8251 /*
8252 * Check for SystemID and ExternalID
8253 */
8254 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8255
8256 if ((URI != NULL) || (ExternalID != NULL)) {
8257 ctxt->hasExternalSubset = 1;
8258 }
8259 ctxt->extSubURI = URI;
8260 ctxt->extSubSystem = ExternalID;
8261
8262 SKIP_BLANKS;
8263
8264 /*
8265 * Create and update the internal subset.
8266 */
8267 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8268 (!ctxt->disableSAX))
8269 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8270
8271 /*
8272 * Is there any internal subset declarations ?
8273 * they are handled separately in xmlParseInternalSubset()
8274 */
8275 if (RAW == '[')
8276 return;
8277
8278 /*
8279 * We should be at the end of the DOCTYPE declaration.
8280 */
8281 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008282 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008283 }
8284 NEXT;
8285}
8286
8287/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008288 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008289 * @ctxt: an XML parser context
8290 *
8291 * parse the internal subset declaration
8292 *
8293 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8294 */
8295
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008296static void
Owen Taylor3473f882001-02-23 17:55:21 +00008297xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8298 /*
8299 * Is there any DTD definition ?
8300 */
8301 if (RAW == '[') {
8302 ctxt->instate = XML_PARSER_DTD;
8303 NEXT;
8304 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008305 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008306 * PEReferences.
8307 * Subsequence (markupdecl | PEReference | S)*
8308 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008309 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008310 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008311 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008312
8313 SKIP_BLANKS;
8314 xmlParseMarkupDecl(ctxt);
8315 xmlParsePEReference(ctxt);
8316
8317 /*
8318 * Pop-up of finished entities.
8319 */
8320 while ((RAW == 0) && (ctxt->inputNr > 1))
8321 xmlPopInput(ctxt);
8322
8323 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008324 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008325 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008326 break;
8327 }
8328 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008329 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008330 NEXT;
8331 SKIP_BLANKS;
8332 }
8333 }
8334
8335 /*
8336 * We should be at the end of the DOCTYPE declaration.
8337 */
8338 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008339 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008340 }
8341 NEXT;
8342}
8343
Daniel Veillard81273902003-09-30 00:43:48 +00008344#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008345/**
8346 * xmlParseAttribute:
8347 * @ctxt: an XML parser context
8348 * @value: a xmlChar ** used to store the value of the attribute
8349 *
8350 * parse an attribute
8351 *
8352 * [41] Attribute ::= Name Eq AttValue
8353 *
8354 * [ WFC: No External Entity References ]
8355 * Attribute values cannot contain direct or indirect entity references
8356 * to external entities.
8357 *
8358 * [ WFC: No < in Attribute Values ]
8359 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008360 * an attribute value (other than "&lt;") must not contain a <.
8361 *
Owen Taylor3473f882001-02-23 17:55:21 +00008362 * [ VC: Attribute Value Type ]
8363 * The attribute must have been declared; the value must be of the type
8364 * declared for it.
8365 *
8366 * [25] Eq ::= S? '=' S?
8367 *
8368 * With namespace:
8369 *
8370 * [NS 11] Attribute ::= QName Eq AttValue
8371 *
8372 * Also the case QName == xmlns:??? is handled independently as a namespace
8373 * definition.
8374 *
8375 * Returns the attribute name, and the value in *value.
8376 */
8377
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008378const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008379xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008380 const xmlChar *name;
8381 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008382
8383 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008384 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008385 name = xmlParseName(ctxt);
8386 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008387 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008388 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008389 return(NULL);
8390 }
8391
8392 /*
8393 * read the value
8394 */
8395 SKIP_BLANKS;
8396 if (RAW == '=') {
8397 NEXT;
8398 SKIP_BLANKS;
8399 val = xmlParseAttValue(ctxt);
8400 ctxt->instate = XML_PARSER_CONTENT;
8401 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008402 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008403 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008404 return(NULL);
8405 }
8406
8407 /*
8408 * Check that xml:lang conforms to the specification
8409 * No more registered as an error, just generate a warning now
8410 * since this was deprecated in XML second edition
8411 */
8412 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8413 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008414 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8415 "Malformed value for xml:lang : %s\n",
8416 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008417 }
8418 }
8419
8420 /*
8421 * Check that xml:space conforms to the specification
8422 */
8423 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8424 if (xmlStrEqual(val, BAD_CAST "default"))
8425 *(ctxt->space) = 0;
8426 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8427 *(ctxt->space) = 1;
8428 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008429 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008430"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008431 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008432 }
8433 }
8434
8435 *value = val;
8436 return(name);
8437}
8438
8439/**
8440 * xmlParseStartTag:
8441 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008442 *
Owen Taylor3473f882001-02-23 17:55:21 +00008443 * parse a start of tag either for rule element or
8444 * EmptyElement. In both case we don't parse the tag closing chars.
8445 *
8446 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8447 *
8448 * [ WFC: Unique Att Spec ]
8449 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008450 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008451 *
8452 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8453 *
8454 * [ WFC: Unique Att Spec ]
8455 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008456 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008457 *
8458 * With namespace:
8459 *
8460 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8461 *
8462 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8463 *
8464 * Returns the element name parsed
8465 */
8466
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008467const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008468xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008469 const xmlChar *name;
8470 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008471 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008472 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008473 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008474 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008475 int i;
8476
8477 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008478 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008479
8480 name = xmlParseName(ctxt);
8481 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008482 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008483 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008484 return(NULL);
8485 }
8486
8487 /*
8488 * Now parse the attributes, it ends up with the ending
8489 *
8490 * (S Attribute)* S?
8491 */
8492 SKIP_BLANKS;
8493 GROW;
8494
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008495 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008496 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008497 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008498 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008499 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008500
8501 attname = xmlParseAttribute(ctxt, &attvalue);
8502 if ((attname != NULL) && (attvalue != NULL)) {
8503 /*
8504 * [ WFC: Unique Att Spec ]
8505 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008506 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008507 */
8508 for (i = 0; i < nbatts;i += 2) {
8509 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008510 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008511 xmlFree(attvalue);
8512 goto failed;
8513 }
8514 }
Owen Taylor3473f882001-02-23 17:55:21 +00008515 /*
8516 * Add the pair to atts
8517 */
8518 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008519 maxatts = 22; /* allow for 10 attrs by default */
8520 atts = (const xmlChar **)
8521 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008522 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008523 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008524 if (attvalue != NULL)
8525 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008526 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008527 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008528 ctxt->atts = atts;
8529 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008530 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008531 const xmlChar **n;
8532
Owen Taylor3473f882001-02-23 17:55:21 +00008533 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008534 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008535 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008536 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008537 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008538 if (attvalue != NULL)
8539 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008540 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008541 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008542 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008543 ctxt->atts = atts;
8544 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008545 }
8546 atts[nbatts++] = attname;
8547 atts[nbatts++] = attvalue;
8548 atts[nbatts] = NULL;
8549 atts[nbatts + 1] = NULL;
8550 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008551 if (attvalue != NULL)
8552 xmlFree(attvalue);
8553 }
8554
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008555failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008556
Daniel Veillard3772de32002-12-17 10:31:45 +00008557 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008558 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8559 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008560 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008561 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8562 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008563 }
8564 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008565 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8566 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008567 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8568 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008569 break;
8570 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008571 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008572 GROW;
8573 }
8574
8575 /*
8576 * SAX: Start of Element !
8577 */
8578 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008579 (!ctxt->disableSAX)) {
8580 if (nbatts > 0)
8581 ctxt->sax->startElement(ctxt->userData, name, atts);
8582 else
8583 ctxt->sax->startElement(ctxt->userData, name, NULL);
8584 }
Owen Taylor3473f882001-02-23 17:55:21 +00008585
8586 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008587 /* Free only the content strings */
8588 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008589 if (atts[i] != NULL)
8590 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008591 }
8592 return(name);
8593}
8594
8595/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008596 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008597 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008598 * @line: line of the start tag
8599 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008600 *
8601 * parse an end of tag
8602 *
8603 * [42] ETag ::= '</' Name S? '>'
8604 *
8605 * With namespace
8606 *
8607 * [NS 9] ETag ::= '</' QName S? '>'
8608 */
8609
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008610static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008611xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008612 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008613
8614 GROW;
8615 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008616 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008617 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008618 return;
8619 }
8620 SKIP(2);
8621
Daniel Veillard46de64e2002-05-29 08:21:33 +00008622 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008623
8624 /*
8625 * We should definitely be at the ending "S? '>'" part
8626 */
8627 GROW;
8628 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008629 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008630 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008631 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008632 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008633
8634 /*
8635 * [ WFC: Element Type Match ]
8636 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008637 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008638 *
8639 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008640 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008641 if (name == NULL) name = BAD_CAST "unparseable";
8642 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008643 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008644 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008645 }
8646
8647 /*
8648 * SAX: End of Tag
8649 */
8650 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8651 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008652 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008653
Daniel Veillarde57ec792003-09-10 10:50:59 +00008654 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008655 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008656 return;
8657}
8658
8659/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008660 * xmlParseEndTag:
8661 * @ctxt: an XML parser context
8662 *
8663 * parse an end of tag
8664 *
8665 * [42] ETag ::= '</' Name S? '>'
8666 *
8667 * With namespace
8668 *
8669 * [NS 9] ETag ::= '</' QName S? '>'
8670 */
8671
8672void
8673xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008674 xmlParseEndTag1(ctxt, 0);
8675}
Daniel Veillard81273902003-09-30 00:43:48 +00008676#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008677
8678/************************************************************************
8679 * *
8680 * SAX 2 specific operations *
8681 * *
8682 ************************************************************************/
8683
Daniel Veillard0fb18932003-09-07 09:14:37 +00008684/*
8685 * xmlGetNamespace:
8686 * @ctxt: an XML parser context
8687 * @prefix: the prefix to lookup
8688 *
8689 * Lookup the namespace name for the @prefix (which ca be NULL)
8690 * The prefix must come from the @ctxt->dict dictionnary
8691 *
8692 * Returns the namespace name or NULL if not bound
8693 */
8694static const xmlChar *
8695xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8696 int i;
8697
Daniel Veillarde57ec792003-09-10 10:50:59 +00008698 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008699 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008700 if (ctxt->nsTab[i] == prefix) {
8701 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8702 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008703 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008704 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008705 return(NULL);
8706}
8707
8708/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008709 * xmlParseQName:
8710 * @ctxt: an XML parser context
8711 * @prefix: pointer to store the prefix part
8712 *
8713 * parse an XML Namespace QName
8714 *
8715 * [6] QName ::= (Prefix ':')? LocalPart
8716 * [7] Prefix ::= NCName
8717 * [8] LocalPart ::= NCName
8718 *
8719 * Returns the Name parsed or NULL
8720 */
8721
8722static const xmlChar *
8723xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8724 const xmlChar *l, *p;
8725
8726 GROW;
8727
8728 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008729 if (l == NULL) {
8730 if (CUR == ':') {
8731 l = xmlParseName(ctxt);
8732 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008733 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008734 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008735 *prefix = NULL;
8736 return(l);
8737 }
8738 }
8739 return(NULL);
8740 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008741 if (CUR == ':') {
8742 NEXT;
8743 p = l;
8744 l = xmlParseNCName(ctxt);
8745 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008746 xmlChar *tmp;
8747
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008748 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8749 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008750 l = xmlParseNmtoken(ctxt);
8751 if (l == NULL)
8752 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8753 else {
8754 tmp = xmlBuildQName(l, p, NULL, 0);
8755 xmlFree((char *)l);
8756 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008757 p = xmlDictLookup(ctxt->dict, tmp, -1);
8758 if (tmp != NULL) xmlFree(tmp);
8759 *prefix = NULL;
8760 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008761 }
8762 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008763 xmlChar *tmp;
8764
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008765 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8766 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008767 NEXT;
8768 tmp = (xmlChar *) xmlParseName(ctxt);
8769 if (tmp != NULL) {
8770 tmp = xmlBuildQName(tmp, l, NULL, 0);
8771 l = xmlDictLookup(ctxt->dict, tmp, -1);
8772 if (tmp != NULL) xmlFree(tmp);
8773 *prefix = p;
8774 return(l);
8775 }
8776 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8777 l = xmlDictLookup(ctxt->dict, tmp, -1);
8778 if (tmp != NULL) xmlFree(tmp);
8779 *prefix = p;
8780 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008781 }
8782 *prefix = p;
8783 } else
8784 *prefix = NULL;
8785 return(l);
8786}
8787
8788/**
8789 * xmlParseQNameAndCompare:
8790 * @ctxt: an XML parser context
8791 * @name: the localname
8792 * @prefix: the prefix, if any.
8793 *
8794 * parse an XML name and compares for match
8795 * (specialized for endtag parsing)
8796 *
8797 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8798 * and the name for mismatch
8799 */
8800
8801static const xmlChar *
8802xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8803 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008804 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008805 const xmlChar *in;
8806 const xmlChar *ret;
8807 const xmlChar *prefix2;
8808
8809 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8810
8811 GROW;
8812 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008813
Daniel Veillard0fb18932003-09-07 09:14:37 +00008814 cmp = prefix;
8815 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008816 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008817 ++cmp;
8818 }
8819 if ((*cmp == 0) && (*in == ':')) {
8820 in++;
8821 cmp = name;
8822 while (*in != 0 && *in == *cmp) {
8823 ++in;
8824 ++cmp;
8825 }
William M. Brack76e95df2003-10-18 16:20:14 +00008826 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008827 /* success */
8828 ctxt->input->cur = in;
8829 return((const xmlChar*) 1);
8830 }
8831 }
8832 /*
8833 * all strings coms from the dictionary, equality can be done directly
8834 */
8835 ret = xmlParseQName (ctxt, &prefix2);
8836 if ((ret == name) && (prefix == prefix2))
8837 return((const xmlChar*) 1);
8838 return ret;
8839}
8840
8841/**
8842 * xmlParseAttValueInternal:
8843 * @ctxt: an XML parser context
8844 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008845 * @alloc: whether the attribute was reallocated as a new string
8846 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008847 *
8848 * parse a value for an attribute.
8849 * NOTE: if no normalization is needed, the routine will return pointers
8850 * directly from the data buffer.
8851 *
8852 * 3.3.3 Attribute-Value Normalization:
8853 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008854 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008855 * - a character reference is processed by appending the referenced
8856 * character to the attribute value
8857 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008858 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008859 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8860 * appending #x20 to the normalized value, except that only a single
8861 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008862 * parsed entity or the literal entity value of an internal parsed entity
8863 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008864 * If the declared value is not CDATA, then the XML processor must further
8865 * process the normalized attribute value by discarding any leading and
8866 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008867 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008868 * All attributes for which no declaration has been read should be treated
8869 * by a non-validating parser as if declared CDATA.
8870 *
8871 * Returns the AttValue parsed or NULL. The value has to be freed by the
8872 * caller if it was copied, this can be detected by val[*len] == 0.
8873 */
8874
8875static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008876xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8877 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008878{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008879 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008880 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008881 xmlChar *ret = NULL;
8882
8883 GROW;
8884 in = (xmlChar *) CUR_PTR;
8885 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008886 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008887 return (NULL);
8888 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008889 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008890
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008891 /*
8892 * try to handle in this routine the most common case where no
8893 * allocation of a new string is required and where content is
8894 * pure ASCII.
8895 */
8896 limit = *in++;
8897 end = ctxt->input->end;
8898 start = in;
8899 if (in >= end) {
8900 const xmlChar *oldbase = ctxt->input->base;
8901 GROW;
8902 if (oldbase != ctxt->input->base) {
8903 long delta = ctxt->input->base - oldbase;
8904 start = start + delta;
8905 in = in + delta;
8906 }
8907 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008908 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008909 if (normalize) {
8910 /*
8911 * Skip any leading spaces
8912 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008913 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008914 ((*in == 0x20) || (*in == 0x9) ||
8915 (*in == 0xA) || (*in == 0xD))) {
8916 in++;
8917 start = in;
8918 if (in >= end) {
8919 const xmlChar *oldbase = ctxt->input->base;
8920 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008921 if (ctxt->instate == XML_PARSER_EOF)
8922 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008923 if (oldbase != ctxt->input->base) {
8924 long delta = ctxt->input->base - oldbase;
8925 start = start + delta;
8926 in = in + delta;
8927 }
8928 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008929 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8930 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8931 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008932 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008933 return(NULL);
8934 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008935 }
8936 }
8937 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8938 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8939 if ((*in++ == 0x20) && (*in == 0x20)) break;
8940 if (in >= end) {
8941 const xmlChar *oldbase = ctxt->input->base;
8942 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008943 if (ctxt->instate == XML_PARSER_EOF)
8944 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008945 if (oldbase != ctxt->input->base) {
8946 long delta = ctxt->input->base - oldbase;
8947 start = start + delta;
8948 in = in + delta;
8949 }
8950 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008951 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8952 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8953 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008954 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008955 return(NULL);
8956 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008957 }
8958 }
8959 last = in;
8960 /*
8961 * skip the trailing blanks
8962 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008963 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008964 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008965 ((*in == 0x20) || (*in == 0x9) ||
8966 (*in == 0xA) || (*in == 0xD))) {
8967 in++;
8968 if (in >= end) {
8969 const xmlChar *oldbase = ctxt->input->base;
8970 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008971 if (ctxt->instate == XML_PARSER_EOF)
8972 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008973 if (oldbase != ctxt->input->base) {
8974 long delta = ctxt->input->base - oldbase;
8975 start = start + delta;
8976 in = in + delta;
8977 last = last + delta;
8978 }
8979 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008980 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8981 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8982 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008983 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008984 return(NULL);
8985 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008986 }
8987 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008988 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8989 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8990 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008991 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008992 return(NULL);
8993 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008994 if (*in != limit) goto need_complex;
8995 } else {
8996 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8997 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8998 in++;
8999 if (in >= end) {
9000 const xmlChar *oldbase = ctxt->input->base;
9001 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009002 if (ctxt->instate == XML_PARSER_EOF)
9003 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009004 if (oldbase != ctxt->input->base) {
9005 long delta = ctxt->input->base - oldbase;
9006 start = start + delta;
9007 in = in + delta;
9008 }
9009 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009010 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9011 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9012 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009013 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009014 return(NULL);
9015 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009016 }
9017 }
9018 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009019 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9020 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9021 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009022 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009023 return(NULL);
9024 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009025 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009026 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009027 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009028 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009029 *len = last - start;
9030 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009031 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009032 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009033 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009034 }
9035 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009036 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009037 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009038need_complex:
9039 if (alloc) *alloc = 1;
9040 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009041}
9042
9043/**
9044 * xmlParseAttribute2:
9045 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009046 * @pref: the element prefix
9047 * @elem: the element name
9048 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009049 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009050 * @len: an int * to save the length of the attribute
9051 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009052 *
9053 * parse an attribute in the new SAX2 framework.
9054 *
9055 * Returns the attribute name, and the value in *value, .
9056 */
9057
9058static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009059xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009060 const xmlChar * pref, const xmlChar * elem,
9061 const xmlChar ** prefix, xmlChar ** value,
9062 int *len, int *alloc)
9063{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009064 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009065 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009066 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009067
9068 *value = NULL;
9069 GROW;
9070 name = xmlParseQName(ctxt, prefix);
9071 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009072 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9073 "error parsing attribute name\n");
9074 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009075 }
9076
9077 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009078 * get the type if needed
9079 */
9080 if (ctxt->attsSpecial != NULL) {
9081 int type;
9082
9083 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009084 pref, elem, *prefix, name);
9085 if (type != 0)
9086 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009087 }
9088
9089 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009090 * read the value
9091 */
9092 SKIP_BLANKS;
9093 if (RAW == '=') {
9094 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009095 SKIP_BLANKS;
9096 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9097 if (normalize) {
9098 /*
9099 * Sometimes a second normalisation pass for spaces is needed
9100 * but that only happens if charrefs or entities refernces
9101 * have been used in the attribute value, i.e. the attribute
9102 * value have been extracted in an allocated string already.
9103 */
9104 if (*alloc) {
9105 const xmlChar *val2;
9106
9107 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009108 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009109 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009110 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009111 }
9112 }
9113 }
9114 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009115 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009116 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9117 "Specification mandate value for attribute %s\n",
9118 name);
9119 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009120 }
9121
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009122 if (*prefix == ctxt->str_xml) {
9123 /*
9124 * Check that xml:lang conforms to the specification
9125 * No more registered as an error, just generate a warning now
9126 * since this was deprecated in XML second edition
9127 */
9128 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9129 internal_val = xmlStrndup(val, *len);
9130 if (!xmlCheckLanguageID(internal_val)) {
9131 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9132 "Malformed value for xml:lang : %s\n",
9133 internal_val, NULL);
9134 }
9135 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009136
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009137 /*
9138 * Check that xml:space conforms to the specification
9139 */
9140 if (xmlStrEqual(name, BAD_CAST "space")) {
9141 internal_val = xmlStrndup(val, *len);
9142 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9143 *(ctxt->space) = 0;
9144 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9145 *(ctxt->space) = 1;
9146 else {
9147 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9148 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9149 internal_val, NULL);
9150 }
9151 }
9152 if (internal_val) {
9153 xmlFree(internal_val);
9154 }
9155 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009156
9157 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009158 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009159}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009160/**
9161 * xmlParseStartTag2:
9162 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009163 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009164 * parse a start of tag either for rule element or
9165 * EmptyElement. In both case we don't parse the tag closing chars.
9166 * This routine is called when running SAX2 parsing
9167 *
9168 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9169 *
9170 * [ WFC: Unique Att Spec ]
9171 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009172 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009173 *
9174 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9175 *
9176 * [ WFC: Unique Att Spec ]
9177 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009178 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009179 *
9180 * With namespace:
9181 *
9182 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9183 *
9184 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9185 *
9186 * Returns the element name parsed
9187 */
9188
9189static const xmlChar *
9190xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009191 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009192 const xmlChar *localname;
9193 const xmlChar *prefix;
9194 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009195 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009196 const xmlChar *nsname;
9197 xmlChar *attvalue;
9198 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009199 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009200 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009201 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009202 const xmlChar *base;
9203 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009204 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009205
9206 if (RAW != '<') return(NULL);
9207 NEXT1;
9208
9209 /*
9210 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9211 * point since the attribute values may be stored as pointers to
9212 * the buffer and calling SHRINK would destroy them !
9213 * The Shrinking is only possible once the full set of attribute
9214 * callbacks have been done.
9215 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009216reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009217 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009218 base = ctxt->input->base;
9219 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009220 oldline = ctxt->input->line;
9221 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009222 nbatts = 0;
9223 nratts = 0;
9224 nbdef = 0;
9225 nbNs = 0;
9226 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009227 /* Forget any namespaces added during an earlier parse of this element. */
9228 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009229
9230 localname = xmlParseQName(ctxt, &prefix);
9231 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009232 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9233 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009234 return(NULL);
9235 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009236 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009237
9238 /*
9239 * Now parse the attributes, it ends up with the ending
9240 *
9241 * (S Attribute)* S?
9242 */
9243 SKIP_BLANKS;
9244 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009245 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009246
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009247 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009248 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009249 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009250 const xmlChar *q = CUR_PTR;
9251 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009252 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009253
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009254 attname = xmlParseAttribute2(ctxt, prefix, localname,
9255 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009256 if (ctxt->input->base != base) {
9257 if ((attvalue != NULL) && (alloc != 0))
9258 xmlFree(attvalue);
9259 attvalue = NULL;
9260 goto base_changed;
9261 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009262 if ((attname != NULL) && (attvalue != NULL)) {
9263 if (len < 0) len = xmlStrlen(attvalue);
9264 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009265 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9266 xmlURIPtr uri;
9267
9268 if (*URL != 0) {
9269 uri = xmlParseURI((const char *) URL);
9270 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009271 xmlNsErr(ctxt, XML_WAR_NS_URI,
9272 "xmlns: '%s' is not a valid URI\n",
9273 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009274 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009275 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009276 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9277 "xmlns: URI %s is not absolute\n",
9278 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009279 }
9280 xmlFreeURI(uri);
9281 }
Daniel Veillard37334572008-07-31 08:20:02 +00009282 if (URL == ctxt->str_xml_ns) {
9283 if (attname != ctxt->str_xml) {
9284 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9285 "xml namespace URI cannot be the default namespace\n",
9286 NULL, NULL, NULL);
9287 }
9288 goto skip_default_ns;
9289 }
9290 if ((len == 29) &&
9291 (xmlStrEqual(URL,
9292 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9293 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9294 "reuse of the xmlns namespace name is forbidden\n",
9295 NULL, NULL, NULL);
9296 goto skip_default_ns;
9297 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009298 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009299 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009300 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009301 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009302 for (j = 1;j <= nbNs;j++)
9303 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9304 break;
9305 if (j <= nbNs)
9306 xmlErrAttributeDup(ctxt, NULL, attname);
9307 else
9308 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009309skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009310 if (alloc != 0) xmlFree(attvalue);
9311 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009312 continue;
9313 }
9314 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009315 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9316 xmlURIPtr uri;
9317
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009318 if (attname == ctxt->str_xml) {
9319 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009320 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9321 "xml namespace prefix mapped to wrong URI\n",
9322 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009323 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009324 /*
9325 * Do not keep a namespace definition node
9326 */
Daniel Veillard37334572008-07-31 08:20:02 +00009327 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009328 }
Daniel Veillard37334572008-07-31 08:20:02 +00009329 if (URL == ctxt->str_xml_ns) {
9330 if (attname != ctxt->str_xml) {
9331 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9332 "xml namespace URI mapped to wrong prefix\n",
9333 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009334 }
Daniel Veillard37334572008-07-31 08:20:02 +00009335 goto skip_ns;
9336 }
9337 if (attname == ctxt->str_xmlns) {
9338 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9339 "redefinition of the xmlns prefix is forbidden\n",
9340 NULL, NULL, NULL);
9341 goto skip_ns;
9342 }
9343 if ((len == 29) &&
9344 (xmlStrEqual(URL,
9345 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9346 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9347 "reuse of the xmlns namespace name is forbidden\n",
9348 NULL, NULL, NULL);
9349 goto skip_ns;
9350 }
9351 if ((URL == NULL) || (URL[0] == 0)) {
9352 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9353 "xmlns:%s: Empty XML namespace is not allowed\n",
9354 attname, NULL, NULL);
9355 goto skip_ns;
9356 } else {
9357 uri = xmlParseURI((const char *) URL);
9358 if (uri == NULL) {
9359 xmlNsErr(ctxt, XML_WAR_NS_URI,
9360 "xmlns:%s: '%s' is not a valid URI\n",
9361 attname, URL, NULL);
9362 } else {
9363 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9364 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9365 "xmlns:%s: URI %s is not absolute\n",
9366 attname, URL, NULL);
9367 }
9368 xmlFreeURI(uri);
9369 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009370 }
9371
Daniel Veillard0fb18932003-09-07 09:14:37 +00009372 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009373 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009374 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009375 for (j = 1;j <= nbNs;j++)
9376 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9377 break;
9378 if (j <= nbNs)
9379 xmlErrAttributeDup(ctxt, aprefix, attname);
9380 else
9381 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009382skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009383 if (alloc != 0) xmlFree(attvalue);
9384 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009385 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009386 continue;
9387 }
9388
9389 /*
9390 * Add the pair to atts
9391 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009392 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9393 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009394 if (attvalue[len] == 0)
9395 xmlFree(attvalue);
9396 goto failed;
9397 }
9398 maxatts = ctxt->maxatts;
9399 atts = ctxt->atts;
9400 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009401 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009402 atts[nbatts++] = attname;
9403 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009404 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009405 atts[nbatts++] = attvalue;
9406 attvalue += len;
9407 atts[nbatts++] = attvalue;
9408 /*
9409 * tag if some deallocation is needed
9410 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009411 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009412 } else {
9413 if ((attvalue != NULL) && (attvalue[len] == 0))
9414 xmlFree(attvalue);
9415 }
9416
Daniel Veillard37334572008-07-31 08:20:02 +00009417failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009418
9419 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009420 if (ctxt->instate == XML_PARSER_EOF)
9421 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009422 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009423 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9424 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009425 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009426 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9427 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009428 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009429 }
9430 SKIP_BLANKS;
9431 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9432 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009433 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009434 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009435 break;
9436 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009437 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009438 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009439 }
9440
Daniel Veillard0fb18932003-09-07 09:14:37 +00009441 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009442 * The attributes defaulting
9443 */
9444 if (ctxt->attsDefault != NULL) {
9445 xmlDefAttrsPtr defaults;
9446
9447 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9448 if (defaults != NULL) {
9449 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009450 attname = defaults->values[5 * i];
9451 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009452
9453 /*
9454 * special work for namespaces defaulted defs
9455 */
9456 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9457 /*
9458 * check that it's not a defined namespace
9459 */
9460 for (j = 1;j <= nbNs;j++)
9461 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9462 break;
9463 if (j <= nbNs) continue;
9464
9465 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009466 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009467 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009468 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009469 nbNs++;
9470 }
9471 } else if (aprefix == ctxt->str_xmlns) {
9472 /*
9473 * check that it's not a defined namespace
9474 */
9475 for (j = 1;j <= nbNs;j++)
9476 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9477 break;
9478 if (j <= nbNs) continue;
9479
9480 nsname = xmlGetNamespace(ctxt, attname);
9481 if (nsname != defaults->values[2]) {
9482 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009483 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009484 nbNs++;
9485 }
9486 } else {
9487 /*
9488 * check that it's not a defined attribute
9489 */
9490 for (j = 0;j < nbatts;j+=5) {
9491 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9492 break;
9493 }
9494 if (j < nbatts) continue;
9495
9496 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9497 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009498 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009499 }
9500 maxatts = ctxt->maxatts;
9501 atts = ctxt->atts;
9502 }
9503 atts[nbatts++] = attname;
9504 atts[nbatts++] = aprefix;
9505 if (aprefix == NULL)
9506 atts[nbatts++] = NULL;
9507 else
9508 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009509 atts[nbatts++] = defaults->values[5 * i + 2];
9510 atts[nbatts++] = defaults->values[5 * i + 3];
9511 if ((ctxt->standalone == 1) &&
9512 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009513 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009514 "standalone: attribute %s on %s defaulted from external subset\n",
9515 attname, localname);
9516 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009517 nbdef++;
9518 }
9519 }
9520 }
9521 }
9522
Daniel Veillarde70c8772003-11-25 07:21:18 +00009523 /*
9524 * The attributes checkings
9525 */
9526 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009527 /*
9528 * The default namespace does not apply to attribute names.
9529 */
9530 if (atts[i + 1] != NULL) {
9531 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9532 if (nsname == NULL) {
9533 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9534 "Namespace prefix %s for %s on %s is not defined\n",
9535 atts[i + 1], atts[i], localname);
9536 }
9537 atts[i + 2] = nsname;
9538 } else
9539 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009540 /*
9541 * [ WFC: Unique Att Spec ]
9542 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009543 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009544 * As extended by the Namespace in XML REC.
9545 */
9546 for (j = 0; j < i;j += 5) {
9547 if (atts[i] == atts[j]) {
9548 if (atts[i+1] == atts[j+1]) {
9549 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9550 break;
9551 }
9552 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9553 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9554 "Namespaced Attribute %s in '%s' redefined\n",
9555 atts[i], nsname, NULL);
9556 break;
9557 }
9558 }
9559 }
9560 }
9561
Daniel Veillarde57ec792003-09-10 10:50:59 +00009562 nsname = xmlGetNamespace(ctxt, prefix);
9563 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009564 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9565 "Namespace prefix %s on %s is not defined\n",
9566 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009567 }
9568 *pref = prefix;
9569 *URI = nsname;
9570
9571 /*
9572 * SAX: Start of Element !
9573 */
9574 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9575 (!ctxt->disableSAX)) {
9576 if (nbNs > 0)
9577 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9578 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9579 nbatts / 5, nbdef, atts);
9580 else
9581 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9582 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9583 }
9584
9585 /*
9586 * Free up attribute allocated strings if needed
9587 */
9588 if (attval != 0) {
9589 for (i = 3,j = 0; j < nratts;i += 5,j++)
9590 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9591 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009592 }
9593
9594 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009595
9596base_changed:
9597 /*
9598 * the attribute strings are valid iif the base didn't changed
9599 */
9600 if (attval != 0) {
9601 for (i = 3,j = 0; j < nratts;i += 5,j++)
9602 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9603 xmlFree((xmlChar *) atts[i]);
9604 }
9605 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009606 ctxt->input->line = oldline;
9607 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009608 if (ctxt->wellFormed == 1) {
9609 goto reparse;
9610 }
9611 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009612}
9613
9614/**
9615 * xmlParseEndTag2:
9616 * @ctxt: an XML parser context
9617 * @line: line of the start tag
9618 * @nsNr: number of namespaces on the start tag
9619 *
9620 * parse an end of tag
9621 *
9622 * [42] ETag ::= '</' Name S? '>'
9623 *
9624 * With namespace
9625 *
9626 * [NS 9] ETag ::= '</' QName S? '>'
9627 */
9628
9629static void
9630xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009631 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009632 const xmlChar *name;
9633
9634 GROW;
9635 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009636 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009637 return;
9638 }
9639 SKIP(2);
9640
William M. Brack13dfa872004-09-18 04:52:08 +00009641 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009642 if (ctxt->input->cur[tlen] == '>') {
9643 ctxt->input->cur += tlen + 1;
9644 goto done;
9645 }
9646 ctxt->input->cur += tlen;
9647 name = (xmlChar*)1;
9648 } else {
9649 if (prefix == NULL)
9650 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9651 else
9652 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9653 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009654
9655 /*
9656 * We should definitely be at the ending "S? '>'" part
9657 */
9658 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009659 if (ctxt->instate == XML_PARSER_EOF)
9660 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009661 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009662 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009663 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009664 } else
9665 NEXT1;
9666
9667 /*
9668 * [ WFC: Element Type Match ]
9669 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009670 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009671 *
9672 */
9673 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009674 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009675 if ((line == 0) && (ctxt->node != NULL))
9676 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009677 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009678 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009679 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009680 }
9681
9682 /*
9683 * SAX: End of Tag
9684 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009685done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009686 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9687 (!ctxt->disableSAX))
9688 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9689
Daniel Veillard0fb18932003-09-07 09:14:37 +00009690 spacePop(ctxt);
9691 if (nsNr != 0)
9692 nsPop(ctxt, nsNr);
9693 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009694}
9695
9696/**
Owen Taylor3473f882001-02-23 17:55:21 +00009697 * xmlParseCDSect:
9698 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009699 *
Owen Taylor3473f882001-02-23 17:55:21 +00009700 * Parse escaped pure raw content.
9701 *
9702 * [18] CDSect ::= CDStart CData CDEnd
9703 *
9704 * [19] CDStart ::= '<![CDATA['
9705 *
9706 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9707 *
9708 * [21] CDEnd ::= ']]>'
9709 */
9710void
9711xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9712 xmlChar *buf = NULL;
9713 int len = 0;
9714 int size = XML_PARSER_BUFFER_SIZE;
9715 int r, rl;
9716 int s, sl;
9717 int cur, l;
9718 int count = 0;
9719
Daniel Veillard8f597c32003-10-06 08:19:27 +00009720 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009721 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009722 SKIP(9);
9723 } else
9724 return;
9725
9726 ctxt->instate = XML_PARSER_CDATA_SECTION;
9727 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009728 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009729 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009730 ctxt->instate = XML_PARSER_CONTENT;
9731 return;
9732 }
9733 NEXTL(rl);
9734 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009735 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009736 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009737 ctxt->instate = XML_PARSER_CONTENT;
9738 return;
9739 }
9740 NEXTL(sl);
9741 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009742 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009743 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009744 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009745 return;
9746 }
William M. Brack871611b2003-10-18 04:53:14 +00009747 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009748 ((r != ']') || (s != ']') || (cur != '>'))) {
9749 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009750 xmlChar *tmp;
9751
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009752 if ((size > XML_MAX_TEXT_LENGTH) &&
9753 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9754 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9755 "CData section too big found", NULL);
9756 xmlFree (buf);
9757 return;
9758 }
9759 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009760 if (tmp == NULL) {
9761 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009762 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009763 return;
9764 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009765 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009766 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009767 }
9768 COPY_BUF(rl,buf,len,r);
9769 r = s;
9770 rl = sl;
9771 s = cur;
9772 sl = l;
9773 count++;
9774 if (count > 50) {
9775 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009776 if (ctxt->instate == XML_PARSER_EOF) {
9777 xmlFree(buf);
9778 return;
9779 }
Owen Taylor3473f882001-02-23 17:55:21 +00009780 count = 0;
9781 }
9782 NEXTL(l);
9783 cur = CUR_CHAR(l);
9784 }
9785 buf[len] = 0;
9786 ctxt->instate = XML_PARSER_CONTENT;
9787 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009788 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009789 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009790 xmlFree(buf);
9791 return;
9792 }
9793 NEXTL(l);
9794
9795 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009796 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009797 */
9798 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9799 if (ctxt->sax->cdataBlock != NULL)
9800 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009801 else if (ctxt->sax->characters != NULL)
9802 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009803 }
9804 xmlFree(buf);
9805}
9806
9807/**
9808 * xmlParseContent:
9809 * @ctxt: an XML parser context
9810 *
9811 * Parse a content:
9812 *
9813 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9814 */
9815
9816void
9817xmlParseContent(xmlParserCtxtPtr ctxt) {
9818 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009819 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009820 ((RAW != '<') || (NXT(1) != '/')) &&
9821 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009822 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009823 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009824 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009825
9826 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009827 * First case : a Processing Instruction.
9828 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009829 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009830 xmlParsePI(ctxt);
9831 }
9832
9833 /*
9834 * Second case : a CDSection
9835 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009836 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009837 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009838 xmlParseCDSect(ctxt);
9839 }
9840
9841 /*
9842 * Third case : a comment
9843 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009844 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009845 (NXT(2) == '-') && (NXT(3) == '-')) {
9846 xmlParseComment(ctxt);
9847 ctxt->instate = XML_PARSER_CONTENT;
9848 }
9849
9850 /*
9851 * Fourth case : a sub-element.
9852 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009853 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009854 xmlParseElement(ctxt);
9855 }
9856
9857 /*
9858 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009859 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009860 */
9861
Daniel Veillard21a0f912001-02-25 19:54:14 +00009862 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009863 xmlParseReference(ctxt);
9864 }
9865
9866 /*
9867 * Last case, text. Note that References are handled directly.
9868 */
9869 else {
9870 xmlParseCharData(ctxt, 0);
9871 }
9872
9873 GROW;
9874 /*
9875 * Pop-up of finished entities.
9876 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009877 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009878 xmlPopInput(ctxt);
9879 SHRINK;
9880
Daniel Veillardfdc91562002-07-01 21:52:03 +00009881 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009882 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9883 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009884 ctxt->instate = XML_PARSER_EOF;
9885 break;
9886 }
9887 }
9888}
9889
9890/**
9891 * xmlParseElement:
9892 * @ctxt: an XML parser context
9893 *
9894 * parse an XML element, this is highly recursive
9895 *
9896 * [39] element ::= EmptyElemTag | STag content ETag
9897 *
9898 * [ WFC: Element Type Match ]
9899 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009900 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009901 *
Owen Taylor3473f882001-02-23 17:55:21 +00009902 */
9903
9904void
9905xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009906 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009907 const xmlChar *prefix = NULL;
9908 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009909 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009910 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009911 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009912 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009913
Daniel Veillard8915c152008-08-26 13:05:34 +00009914 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9915 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9916 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9917 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9918 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009919 ctxt->instate = XML_PARSER_EOF;
9920 return;
9921 }
9922
Owen Taylor3473f882001-02-23 17:55:21 +00009923 /* Capture start position */
9924 if (ctxt->record_info) {
9925 node_info.begin_pos = ctxt->input->consumed +
9926 (CUR_PTR - ctxt->input->base);
9927 node_info.begin_line = ctxt->input->line;
9928 }
9929
9930 if (ctxt->spaceNr == 0)
9931 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009932 else if (*ctxt->space == -2)
9933 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009934 else
9935 spacePush(ctxt, *ctxt->space);
9936
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009937 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009938#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009939 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009940#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009941 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009942#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009943 else
9944 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009945#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009946 if (ctxt->instate == XML_PARSER_EOF)
9947 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009948 if (name == NULL) {
9949 spacePop(ctxt);
9950 return;
9951 }
9952 namePush(ctxt, name);
9953 ret = ctxt->node;
9954
Daniel Veillard4432df22003-09-28 18:58:27 +00009955#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009956 /*
9957 * [ VC: Root Element Type ]
9958 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009959 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009960 */
9961 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9962 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9963 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009964#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009965
9966 /*
9967 * Check for an Empty Element.
9968 */
9969 if ((RAW == '/') && (NXT(1) == '>')) {
9970 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009971 if (ctxt->sax2) {
9972 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9973 (!ctxt->disableSAX))
9974 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009975#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009976 } else {
9977 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9978 (!ctxt->disableSAX))
9979 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009980#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009981 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009982 namePop(ctxt);
9983 spacePop(ctxt);
9984 if (nsNr != ctxt->nsNr)
9985 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009986 if ( ret != NULL && ctxt->record_info ) {
9987 node_info.end_pos = ctxt->input->consumed +
9988 (CUR_PTR - ctxt->input->base);
9989 node_info.end_line = ctxt->input->line;
9990 node_info.node = ret;
9991 xmlParserAddNodeInfo(ctxt, &node_info);
9992 }
9993 return;
9994 }
9995 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009996 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009997 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009998 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9999 "Couldn't find end of Start Tag %s line %d\n",
10000 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010001
10002 /*
10003 * end of parsing of this node.
10004 */
10005 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010006 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010007 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010008 if (nsNr != ctxt->nsNr)
10009 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010010
10011 /*
10012 * Capture end position and add node
10013 */
10014 if ( ret != NULL && ctxt->record_info ) {
10015 node_info.end_pos = ctxt->input->consumed +
10016 (CUR_PTR - ctxt->input->base);
10017 node_info.end_line = ctxt->input->line;
10018 node_info.node = ret;
10019 xmlParserAddNodeInfo(ctxt, &node_info);
10020 }
10021 return;
10022 }
10023
10024 /*
10025 * Parse the content of the element:
10026 */
10027 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +000010028 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010029 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010030 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010031 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010032
10033 /*
10034 * end of parsing of this node.
10035 */
10036 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010037 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010038 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010039 if (nsNr != ctxt->nsNr)
10040 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010041 return;
10042 }
10043
10044 /*
10045 * parse the end of tag: '</' should be here.
10046 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010047 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010048 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010049 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010050 }
10051#ifdef LIBXML_SAX1_ENABLED
10052 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010053 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010054#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010055
10056 /*
10057 * Capture end position and add node
10058 */
10059 if ( ret != NULL && ctxt->record_info ) {
10060 node_info.end_pos = ctxt->input->consumed +
10061 (CUR_PTR - ctxt->input->base);
10062 node_info.end_line = ctxt->input->line;
10063 node_info.node = ret;
10064 xmlParserAddNodeInfo(ctxt, &node_info);
10065 }
10066}
10067
10068/**
10069 * xmlParseVersionNum:
10070 * @ctxt: an XML parser context
10071 *
10072 * parse the XML version value.
10073 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010074 * [26] VersionNum ::= '1.' [0-9]+
10075 *
10076 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010077 *
10078 * Returns the string giving the XML version number, or NULL
10079 */
10080xmlChar *
10081xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10082 xmlChar *buf = NULL;
10083 int len = 0;
10084 int size = 10;
10085 xmlChar cur;
10086
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010087 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010088 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010089 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010090 return(NULL);
10091 }
10092 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010093 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010094 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010095 return(NULL);
10096 }
10097 buf[len++] = cur;
10098 NEXT;
10099 cur=CUR;
10100 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010101 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010102 return(NULL);
10103 }
10104 buf[len++] = cur;
10105 NEXT;
10106 cur=CUR;
10107 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010108 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010109 xmlChar *tmp;
10110
Owen Taylor3473f882001-02-23 17:55:21 +000010111 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010112 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10113 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010114 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010115 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010116 return(NULL);
10117 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010118 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010119 }
10120 buf[len++] = cur;
10121 NEXT;
10122 cur=CUR;
10123 }
10124 buf[len] = 0;
10125 return(buf);
10126}
10127
10128/**
10129 * xmlParseVersionInfo:
10130 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010131 *
Owen Taylor3473f882001-02-23 17:55:21 +000010132 * parse the XML version.
10133 *
10134 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010135 *
Owen Taylor3473f882001-02-23 17:55:21 +000010136 * [25] Eq ::= S? '=' S?
10137 *
10138 * Returns the version string, e.g. "1.0"
10139 */
10140
10141xmlChar *
10142xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10143 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010144
Daniel Veillarda07050d2003-10-19 14:46:32 +000010145 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010146 SKIP(7);
10147 SKIP_BLANKS;
10148 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010149 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010150 return(NULL);
10151 }
10152 NEXT;
10153 SKIP_BLANKS;
10154 if (RAW == '"') {
10155 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010156 version = xmlParseVersionNum(ctxt);
10157 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010158 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010159 } else
10160 NEXT;
10161 } else if (RAW == '\''){
10162 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010163 version = xmlParseVersionNum(ctxt);
10164 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010165 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010166 } else
10167 NEXT;
10168 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010169 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010170 }
10171 }
10172 return(version);
10173}
10174
10175/**
10176 * xmlParseEncName:
10177 * @ctxt: an XML parser context
10178 *
10179 * parse the XML encoding name
10180 *
10181 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10182 *
10183 * Returns the encoding name value or NULL
10184 */
10185xmlChar *
10186xmlParseEncName(xmlParserCtxtPtr ctxt) {
10187 xmlChar *buf = NULL;
10188 int len = 0;
10189 int size = 10;
10190 xmlChar cur;
10191
10192 cur = CUR;
10193 if (((cur >= 'a') && (cur <= 'z')) ||
10194 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010195 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010196 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010197 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010198 return(NULL);
10199 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010200
Owen Taylor3473f882001-02-23 17:55:21 +000010201 buf[len++] = cur;
10202 NEXT;
10203 cur = CUR;
10204 while (((cur >= 'a') && (cur <= 'z')) ||
10205 ((cur >= 'A') && (cur <= 'Z')) ||
10206 ((cur >= '0') && (cur <= '9')) ||
10207 (cur == '.') || (cur == '_') ||
10208 (cur == '-')) {
10209 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010210 xmlChar *tmp;
10211
Owen Taylor3473f882001-02-23 17:55:21 +000010212 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010213 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10214 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010215 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010216 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010217 return(NULL);
10218 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010219 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010220 }
10221 buf[len++] = cur;
10222 NEXT;
10223 cur = CUR;
10224 if (cur == 0) {
10225 SHRINK;
10226 GROW;
10227 cur = CUR;
10228 }
10229 }
10230 buf[len] = 0;
10231 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010232 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010233 }
10234 return(buf);
10235}
10236
10237/**
10238 * xmlParseEncodingDecl:
10239 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010240 *
Owen Taylor3473f882001-02-23 17:55:21 +000010241 * parse the XML encoding declaration
10242 *
10243 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10244 *
10245 * this setups the conversion filters.
10246 *
10247 * Returns the encoding value or NULL
10248 */
10249
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010250const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010251xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10252 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010253
10254 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010255 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010256 SKIP(8);
10257 SKIP_BLANKS;
10258 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010259 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010260 return(NULL);
10261 }
10262 NEXT;
10263 SKIP_BLANKS;
10264 if (RAW == '"') {
10265 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010266 encoding = xmlParseEncName(ctxt);
10267 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010268 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010269 } else
10270 NEXT;
10271 } else if (RAW == '\''){
10272 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010273 encoding = xmlParseEncName(ctxt);
10274 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010275 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010276 } else
10277 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010278 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010279 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010280 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010281
10282 /*
10283 * Non standard parsing, allowing the user to ignore encoding
10284 */
10285 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10286 return(encoding);
10287
Daniel Veillard6b621b82003-08-11 15:03:34 +000010288 /*
10289 * UTF-16 encoding stwich has already taken place at this stage,
10290 * more over the little-endian/big-endian selection is already done
10291 */
10292 if ((encoding != NULL) &&
10293 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10294 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010295 /*
10296 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010297 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010298 * document is apparently UTF-8 compatible, then raise an
10299 * encoding mismatch fatal error
10300 */
10301 if ((ctxt->encoding == NULL) &&
10302 (ctxt->input->buf != NULL) &&
10303 (ctxt->input->buf->encoder == NULL)) {
10304 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10305 "Document labelled UTF-16 but has UTF-8 content\n");
10306 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010307 if (ctxt->encoding != NULL)
10308 xmlFree((xmlChar *) ctxt->encoding);
10309 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010310 }
10311 /*
10312 * UTF-8 encoding is handled natively
10313 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010314 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010315 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10316 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010317 if (ctxt->encoding != NULL)
10318 xmlFree((xmlChar *) ctxt->encoding);
10319 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010320 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010321 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010322 xmlCharEncodingHandlerPtr handler;
10323
10324 if (ctxt->input->encoding != NULL)
10325 xmlFree((xmlChar *) ctxt->input->encoding);
10326 ctxt->input->encoding = encoding;
10327
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010328 handler = xmlFindCharEncodingHandler((const char *) encoding);
10329 if (handler != NULL) {
10330 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010331 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010332 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010333 "Unsupported encoding %s\n", encoding);
10334 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010335 }
10336 }
10337 }
10338 return(encoding);
10339}
10340
10341/**
10342 * xmlParseSDDecl:
10343 * @ctxt: an XML parser context
10344 *
10345 * parse the XML standalone declaration
10346 *
10347 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010348 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010349 *
10350 * [ VC: Standalone Document Declaration ]
10351 * TODO The standalone document declaration must have the value "no"
10352 * if any external markup declarations contain declarations of:
10353 * - attributes with default values, if elements to which these
10354 * attributes apply appear in the document without specifications
10355 * of values for these attributes, or
10356 * - entities (other than amp, lt, gt, apos, quot), if references
10357 * to those entities appear in the document, or
10358 * - attributes with values subject to normalization, where the
10359 * attribute appears in the document with a value which will change
10360 * as a result of normalization, or
10361 * - element types with element content, if white space occurs directly
10362 * within any instance of those types.
10363 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010364 * Returns:
10365 * 1 if standalone="yes"
10366 * 0 if standalone="no"
10367 * -2 if standalone attribute is missing or invalid
10368 * (A standalone value of -2 means that the XML declaration was found,
10369 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010370 */
10371
10372int
10373xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010374 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010375
10376 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010377 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010378 SKIP(10);
10379 SKIP_BLANKS;
10380 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010381 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010382 return(standalone);
10383 }
10384 NEXT;
10385 SKIP_BLANKS;
10386 if (RAW == '\''){
10387 NEXT;
10388 if ((RAW == 'n') && (NXT(1) == 'o')) {
10389 standalone = 0;
10390 SKIP(2);
10391 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10392 (NXT(2) == 's')) {
10393 standalone = 1;
10394 SKIP(3);
10395 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010396 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010397 }
10398 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010399 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010400 } else
10401 NEXT;
10402 } else if (RAW == '"'){
10403 NEXT;
10404 if ((RAW == 'n') && (NXT(1) == 'o')) {
10405 standalone = 0;
10406 SKIP(2);
10407 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10408 (NXT(2) == 's')) {
10409 standalone = 1;
10410 SKIP(3);
10411 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010412 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010413 }
10414 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010415 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010416 } else
10417 NEXT;
10418 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010419 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010420 }
10421 }
10422 return(standalone);
10423}
10424
10425/**
10426 * xmlParseXMLDecl:
10427 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010428 *
Owen Taylor3473f882001-02-23 17:55:21 +000010429 * parse an XML declaration header
10430 *
10431 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10432 */
10433
10434void
10435xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10436 xmlChar *version;
10437
10438 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010439 * This value for standalone indicates that the document has an
10440 * XML declaration but it does not have a standalone attribute.
10441 * It will be overwritten later if a standalone attribute is found.
10442 */
10443 ctxt->input->standalone = -2;
10444
10445 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010446 * We know that '<?xml' is here.
10447 */
10448 SKIP(5);
10449
William M. Brack76e95df2003-10-18 16:20:14 +000010450 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010451 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10452 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010453 }
10454 SKIP_BLANKS;
10455
10456 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010457 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010458 */
10459 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010460 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010461 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010462 } else {
10463 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10464 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010465 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010466 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010467 if (ctxt->options & XML_PARSE_OLD10) {
10468 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10469 "Unsupported version '%s'\n",
10470 version);
10471 } else {
10472 if ((version[0] == '1') && ((version[1] == '.'))) {
10473 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10474 "Unsupported version '%s'\n",
10475 version, NULL);
10476 } else {
10477 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10478 "Unsupported version '%s'\n",
10479 version);
10480 }
10481 }
Daniel Veillard19840942001-11-29 16:11:38 +000010482 }
10483 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010484 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010485 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010486 }
Owen Taylor3473f882001-02-23 17:55:21 +000010487
10488 /*
10489 * We may have the encoding declaration
10490 */
William M. Brack76e95df2003-10-18 16:20:14 +000010491 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010492 if ((RAW == '?') && (NXT(1) == '>')) {
10493 SKIP(2);
10494 return;
10495 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010497 }
10498 xmlParseEncodingDecl(ctxt);
10499 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10500 /*
10501 * The XML REC instructs us to stop parsing right here
10502 */
10503 return;
10504 }
10505
10506 /*
10507 * We may have the standalone status.
10508 */
William M. Brack76e95df2003-10-18 16:20:14 +000010509 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010510 if ((RAW == '?') && (NXT(1) == '>')) {
10511 SKIP(2);
10512 return;
10513 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010514 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010515 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010516
10517 /*
10518 * We can grow the input buffer freely at that point
10519 */
10520 GROW;
10521
Owen Taylor3473f882001-02-23 17:55:21 +000010522 SKIP_BLANKS;
10523 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10524
10525 SKIP_BLANKS;
10526 if ((RAW == '?') && (NXT(1) == '>')) {
10527 SKIP(2);
10528 } else if (RAW == '>') {
10529 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010530 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010531 NEXT;
10532 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010533 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010534 MOVETO_ENDTAG(CUR_PTR);
10535 NEXT;
10536 }
10537}
10538
10539/**
10540 * xmlParseMisc:
10541 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010542 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010543 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010544 *
10545 * [27] Misc ::= Comment | PI | S
10546 */
10547
10548void
10549xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010550 while ((ctxt->instate != XML_PARSER_EOF) &&
10551 (((RAW == '<') && (NXT(1) == '?')) ||
10552 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10553 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010554 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010555 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010556 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010557 NEXT;
10558 } else
10559 xmlParseComment(ctxt);
10560 }
10561}
10562
10563/**
10564 * xmlParseDocument:
10565 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010566 *
Owen Taylor3473f882001-02-23 17:55:21 +000010567 * parse an XML document (and build a tree if using the standard SAX
10568 * interface).
10569 *
10570 * [1] document ::= prolog element Misc*
10571 *
10572 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10573 *
10574 * Returns 0, -1 in case of error. the parser context is augmented
10575 * as a result of the parsing.
10576 */
10577
10578int
10579xmlParseDocument(xmlParserCtxtPtr ctxt) {
10580 xmlChar start[4];
10581 xmlCharEncoding enc;
10582
10583 xmlInitParser();
10584
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010585 if ((ctxt == NULL) || (ctxt->input == NULL))
10586 return(-1);
10587
Owen Taylor3473f882001-02-23 17:55:21 +000010588 GROW;
10589
10590 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010591 * SAX: detecting the level.
10592 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010593 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010594
10595 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010596 * SAX: beginning of the document processing.
10597 */
10598 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10599 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10600
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010601 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010602 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010603 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010604 * Get the 4 first bytes and decode the charset
10605 * if enc != XML_CHAR_ENCODING_NONE
10606 * plug some encoding conversion routines.
10607 */
10608 start[0] = RAW;
10609 start[1] = NXT(1);
10610 start[2] = NXT(2);
10611 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010612 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010613 if (enc != XML_CHAR_ENCODING_NONE) {
10614 xmlSwitchEncoding(ctxt, enc);
10615 }
Owen Taylor3473f882001-02-23 17:55:21 +000010616 }
10617
10618
10619 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010620 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010621 }
10622
10623 /*
10624 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010625 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010626 * than just the first line, unless the amount of data is really
10627 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010628 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010629 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10630 GROW;
10631 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010632 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010633
10634 /*
10635 * Note that we will switch encoding on the fly.
10636 */
10637 xmlParseXMLDecl(ctxt);
10638 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10639 /*
10640 * The XML REC instructs us to stop parsing right here
10641 */
10642 return(-1);
10643 }
10644 ctxt->standalone = ctxt->input->standalone;
10645 SKIP_BLANKS;
10646 } else {
10647 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10648 }
10649 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10650 ctxt->sax->startDocument(ctxt->userData);
10651
10652 /*
10653 * The Misc part of the Prolog
10654 */
10655 GROW;
10656 xmlParseMisc(ctxt);
10657
10658 /*
10659 * Then possibly doc type declaration(s) and more Misc
10660 * (doctypedecl Misc*)?
10661 */
10662 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010663 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010664
10665 ctxt->inSubset = 1;
10666 xmlParseDocTypeDecl(ctxt);
10667 if (RAW == '[') {
10668 ctxt->instate = XML_PARSER_DTD;
10669 xmlParseInternalSubset(ctxt);
10670 }
10671
10672 /*
10673 * Create and update the external subset.
10674 */
10675 ctxt->inSubset = 2;
10676 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10677 (!ctxt->disableSAX))
10678 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10679 ctxt->extSubSystem, ctxt->extSubURI);
10680 ctxt->inSubset = 0;
10681
Daniel Veillardac4118d2008-01-11 05:27:32 +000010682 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010683
10684 ctxt->instate = XML_PARSER_PROLOG;
10685 xmlParseMisc(ctxt);
10686 }
10687
10688 /*
10689 * Time to start parsing the tree itself
10690 */
10691 GROW;
10692 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010693 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10694 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010695 } else {
10696 ctxt->instate = XML_PARSER_CONTENT;
10697 xmlParseElement(ctxt);
10698 ctxt->instate = XML_PARSER_EPILOG;
10699
10700
10701 /*
10702 * The Misc part at the end
10703 */
10704 xmlParseMisc(ctxt);
10705
Daniel Veillard561b7f82002-03-20 21:55:57 +000010706 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010707 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010708 }
10709 ctxt->instate = XML_PARSER_EOF;
10710 }
10711
10712 /*
10713 * SAX: end of the document processing.
10714 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010715 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010716 ctxt->sax->endDocument(ctxt->userData);
10717
Daniel Veillard5997aca2002-03-18 18:36:20 +000010718 /*
10719 * Remove locally kept entity definitions if the tree was not built
10720 */
10721 if ((ctxt->myDoc != NULL) &&
10722 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10723 xmlFreeDoc(ctxt->myDoc);
10724 ctxt->myDoc = NULL;
10725 }
10726
Daniel Veillardae0765b2008-07-31 19:54:59 +000010727 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10728 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10729 if (ctxt->valid)
10730 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10731 if (ctxt->nsWellFormed)
10732 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10733 if (ctxt->options & XML_PARSE_OLD10)
10734 ctxt->myDoc->properties |= XML_DOC_OLD10;
10735 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010736 if (! ctxt->wellFormed) {
10737 ctxt->valid = 0;
10738 return(-1);
10739 }
Owen Taylor3473f882001-02-23 17:55:21 +000010740 return(0);
10741}
10742
10743/**
10744 * xmlParseExtParsedEnt:
10745 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010746 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010747 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010748 * An external general parsed entity is well-formed if it matches the
10749 * production labeled extParsedEnt.
10750 *
10751 * [78] extParsedEnt ::= TextDecl? content
10752 *
10753 * Returns 0, -1 in case of error. the parser context is augmented
10754 * as a result of the parsing.
10755 */
10756
10757int
10758xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10759 xmlChar start[4];
10760 xmlCharEncoding enc;
10761
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010762 if ((ctxt == NULL) || (ctxt->input == NULL))
10763 return(-1);
10764
Owen Taylor3473f882001-02-23 17:55:21 +000010765 xmlDefaultSAXHandlerInit();
10766
Daniel Veillard309f81d2003-09-23 09:02:53 +000010767 xmlDetectSAX2(ctxt);
10768
Owen Taylor3473f882001-02-23 17:55:21 +000010769 GROW;
10770
10771 /*
10772 * SAX: beginning of the document processing.
10773 */
10774 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10775 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10776
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010777 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010778 * Get the 4 first bytes and decode the charset
10779 * if enc != XML_CHAR_ENCODING_NONE
10780 * plug some encoding conversion routines.
10781 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010782 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10783 start[0] = RAW;
10784 start[1] = NXT(1);
10785 start[2] = NXT(2);
10786 start[3] = NXT(3);
10787 enc = xmlDetectCharEncoding(start, 4);
10788 if (enc != XML_CHAR_ENCODING_NONE) {
10789 xmlSwitchEncoding(ctxt, enc);
10790 }
Owen Taylor3473f882001-02-23 17:55:21 +000010791 }
10792
10793
10794 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010795 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010796 }
10797
10798 /*
10799 * Check for the XMLDecl in the Prolog.
10800 */
10801 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010802 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010803
10804 /*
10805 * Note that we will switch encoding on the fly.
10806 */
10807 xmlParseXMLDecl(ctxt);
10808 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10809 /*
10810 * The XML REC instructs us to stop parsing right here
10811 */
10812 return(-1);
10813 }
10814 SKIP_BLANKS;
10815 } else {
10816 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10817 }
10818 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10819 ctxt->sax->startDocument(ctxt->userData);
10820
10821 /*
10822 * Doing validity checking on chunk doesn't make sense
10823 */
10824 ctxt->instate = XML_PARSER_CONTENT;
10825 ctxt->validate = 0;
10826 ctxt->loadsubset = 0;
10827 ctxt->depth = 0;
10828
10829 xmlParseContent(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010830
Owen Taylor3473f882001-02-23 17:55:21 +000010831 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010832 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010833 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010834 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010835 }
10836
10837 /*
10838 * SAX: end of the document processing.
10839 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010840 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010841 ctxt->sax->endDocument(ctxt->userData);
10842
10843 if (! ctxt->wellFormed) return(-1);
10844 return(0);
10845}
10846
Daniel Veillard73b013f2003-09-30 12:36:01 +000010847#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010848/************************************************************************
10849 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010850 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010851 * *
10852 ************************************************************************/
10853
10854/**
10855 * xmlParseLookupSequence:
10856 * @ctxt: an XML parser context
10857 * @first: the first char to lookup
10858 * @next: the next char to lookup or zero
10859 * @third: the next char to lookup or zero
10860 *
10861 * Try to find if a sequence (first, next, third) or just (first next) or
10862 * (first) is available in the input stream.
10863 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10864 * to avoid rescanning sequences of bytes, it DOES change the state of the
10865 * parser, do not use liberally.
10866 *
10867 * Returns the index to the current parsing point if the full sequence
10868 * is available, -1 otherwise.
10869 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010870static int
Owen Taylor3473f882001-02-23 17:55:21 +000010871xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10872 xmlChar next, xmlChar third) {
10873 int base, len;
10874 xmlParserInputPtr in;
10875 const xmlChar *buf;
10876
10877 in = ctxt->input;
10878 if (in == NULL) return(-1);
10879 base = in->cur - in->base;
10880 if (base < 0) return(-1);
10881 if (ctxt->checkIndex > base)
10882 base = ctxt->checkIndex;
10883 if (in->buf == NULL) {
10884 buf = in->base;
10885 len = in->length;
10886 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010887 buf = xmlBufContent(in->buf->buffer);
10888 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010889 }
10890 /* take into account the sequence length */
10891 if (third) len -= 2;
10892 else if (next) len --;
10893 for (;base < len;base++) {
10894 if (buf[base] == first) {
10895 if (third != 0) {
10896 if ((buf[base + 1] != next) ||
10897 (buf[base + 2] != third)) continue;
10898 } else if (next != 0) {
10899 if (buf[base + 1] != next) continue;
10900 }
10901 ctxt->checkIndex = 0;
10902#ifdef DEBUG_PUSH
10903 if (next == 0)
10904 xmlGenericError(xmlGenericErrorContext,
10905 "PP: lookup '%c' found at %d\n",
10906 first, base);
10907 else if (third == 0)
10908 xmlGenericError(xmlGenericErrorContext,
10909 "PP: lookup '%c%c' found at %d\n",
10910 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010911 else
Owen Taylor3473f882001-02-23 17:55:21 +000010912 xmlGenericError(xmlGenericErrorContext,
10913 "PP: lookup '%c%c%c' found at %d\n",
10914 first, next, third, base);
10915#endif
10916 return(base - (in->cur - in->base));
10917 }
10918 }
10919 ctxt->checkIndex = base;
10920#ifdef DEBUG_PUSH
10921 if (next == 0)
10922 xmlGenericError(xmlGenericErrorContext,
10923 "PP: lookup '%c' failed\n", first);
10924 else if (third == 0)
10925 xmlGenericError(xmlGenericErrorContext,
10926 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010927 else
Owen Taylor3473f882001-02-23 17:55:21 +000010928 xmlGenericError(xmlGenericErrorContext,
10929 "PP: lookup '%c%c%c' failed\n", first, next, third);
10930#endif
10931 return(-1);
10932}
10933
10934/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010935 * xmlParseGetLasts:
10936 * @ctxt: an XML parser context
10937 * @lastlt: pointer to store the last '<' from the input
10938 * @lastgt: pointer to store the last '>' from the input
10939 *
10940 * Lookup the last < and > in the current chunk
10941 */
10942static void
10943xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10944 const xmlChar **lastgt) {
10945 const xmlChar *tmp;
10946
10947 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10948 xmlGenericError(xmlGenericErrorContext,
10949 "Internal error: xmlParseGetLasts\n");
10950 return;
10951 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010952 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010953 tmp = ctxt->input->end;
10954 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010955 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010956 if (tmp < ctxt->input->base) {
10957 *lastlt = NULL;
10958 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010959 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010960 *lastlt = tmp;
10961 tmp++;
10962 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10963 if (*tmp == '\'') {
10964 tmp++;
10965 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10966 if (tmp < ctxt->input->end) tmp++;
10967 } else if (*tmp == '"') {
10968 tmp++;
10969 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10970 if (tmp < ctxt->input->end) tmp++;
10971 } else
10972 tmp++;
10973 }
10974 if (tmp < ctxt->input->end)
10975 *lastgt = tmp;
10976 else {
10977 tmp = *lastlt;
10978 tmp--;
10979 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10980 if (tmp >= ctxt->input->base)
10981 *lastgt = tmp;
10982 else
10983 *lastgt = NULL;
10984 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010985 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010986 } else {
10987 *lastlt = NULL;
10988 *lastgt = NULL;
10989 }
10990}
10991/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010992 * xmlCheckCdataPush:
10993 * @cur: pointer to the bock of characters
10994 * @len: length of the block in bytes
10995 *
10996 * Check that the block of characters is okay as SCdata content [20]
10997 *
10998 * Returns the number of bytes to pass if okay, a negative index where an
10999 * UTF-8 error occured otherwise
11000 */
11001static int
11002xmlCheckCdataPush(const xmlChar *utf, int len) {
11003 int ix;
11004 unsigned char c;
11005 int codepoint;
11006
11007 if ((utf == NULL) || (len <= 0))
11008 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011009
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011010 for (ix = 0; ix < len;) { /* string is 0-terminated */
11011 c = utf[ix];
11012 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11013 if (c >= 0x20)
11014 ix++;
11015 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11016 ix++;
11017 else
11018 return(-ix);
11019 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11020 if (ix + 2 > len) return(ix);
11021 if ((utf[ix+1] & 0xc0 ) != 0x80)
11022 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011023 codepoint = (utf[ix] & 0x1f) << 6;
11024 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011025 if (!xmlIsCharQ(codepoint))
11026 return(-ix);
11027 ix += 2;
11028 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11029 if (ix + 3 > len) return(ix);
11030 if (((utf[ix+1] & 0xc0) != 0x80) ||
11031 ((utf[ix+2] & 0xc0) != 0x80))
11032 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011033 codepoint = (utf[ix] & 0xf) << 12;
11034 codepoint |= (utf[ix+1] & 0x3f) << 6;
11035 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011036 if (!xmlIsCharQ(codepoint))
11037 return(-ix);
11038 ix += 3;
11039 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11040 if (ix + 4 > len) return(ix);
11041 if (((utf[ix+1] & 0xc0) != 0x80) ||
11042 ((utf[ix+2] & 0xc0) != 0x80) ||
11043 ((utf[ix+3] & 0xc0) != 0x80))
11044 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011045 codepoint = (utf[ix] & 0x7) << 18;
11046 codepoint |= (utf[ix+1] & 0x3f) << 12;
11047 codepoint |= (utf[ix+2] & 0x3f) << 6;
11048 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011049 if (!xmlIsCharQ(codepoint))
11050 return(-ix);
11051 ix += 4;
11052 } else /* unknown encoding */
11053 return(-ix);
11054 }
11055 return(ix);
11056}
11057
11058/**
Owen Taylor3473f882001-02-23 17:55:21 +000011059 * xmlParseTryOrFinish:
11060 * @ctxt: an XML parser context
11061 * @terminate: last chunk indicator
11062 *
11063 * Try to progress on parsing
11064 *
11065 * Returns zero if no parsing was possible
11066 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011067static int
Owen Taylor3473f882001-02-23 17:55:21 +000011068xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11069 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011070 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011071 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011072 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011073
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011074 if (ctxt->input == NULL)
11075 return(0);
11076
Owen Taylor3473f882001-02-23 17:55:21 +000011077#ifdef DEBUG_PUSH
11078 switch (ctxt->instate) {
11079 case XML_PARSER_EOF:
11080 xmlGenericError(xmlGenericErrorContext,
11081 "PP: try EOF\n"); break;
11082 case XML_PARSER_START:
11083 xmlGenericError(xmlGenericErrorContext,
11084 "PP: try START\n"); break;
11085 case XML_PARSER_MISC:
11086 xmlGenericError(xmlGenericErrorContext,
11087 "PP: try MISC\n");break;
11088 case XML_PARSER_COMMENT:
11089 xmlGenericError(xmlGenericErrorContext,
11090 "PP: try COMMENT\n");break;
11091 case XML_PARSER_PROLOG:
11092 xmlGenericError(xmlGenericErrorContext,
11093 "PP: try PROLOG\n");break;
11094 case XML_PARSER_START_TAG:
11095 xmlGenericError(xmlGenericErrorContext,
11096 "PP: try START_TAG\n");break;
11097 case XML_PARSER_CONTENT:
11098 xmlGenericError(xmlGenericErrorContext,
11099 "PP: try CONTENT\n");break;
11100 case XML_PARSER_CDATA_SECTION:
11101 xmlGenericError(xmlGenericErrorContext,
11102 "PP: try CDATA_SECTION\n");break;
11103 case XML_PARSER_END_TAG:
11104 xmlGenericError(xmlGenericErrorContext,
11105 "PP: try END_TAG\n");break;
11106 case XML_PARSER_ENTITY_DECL:
11107 xmlGenericError(xmlGenericErrorContext,
11108 "PP: try ENTITY_DECL\n");break;
11109 case XML_PARSER_ENTITY_VALUE:
11110 xmlGenericError(xmlGenericErrorContext,
11111 "PP: try ENTITY_VALUE\n");break;
11112 case XML_PARSER_ATTRIBUTE_VALUE:
11113 xmlGenericError(xmlGenericErrorContext,
11114 "PP: try ATTRIBUTE_VALUE\n");break;
11115 case XML_PARSER_DTD:
11116 xmlGenericError(xmlGenericErrorContext,
11117 "PP: try DTD\n");break;
11118 case XML_PARSER_EPILOG:
11119 xmlGenericError(xmlGenericErrorContext,
11120 "PP: try EPILOG\n");break;
11121 case XML_PARSER_PI:
11122 xmlGenericError(xmlGenericErrorContext,
11123 "PP: try PI\n");break;
11124 case XML_PARSER_IGNORE:
11125 xmlGenericError(xmlGenericErrorContext,
11126 "PP: try IGNORE\n");break;
11127 }
11128#endif
11129
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011130 if ((ctxt->input != NULL) &&
11131 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011132 xmlSHRINK(ctxt);
11133 ctxt->checkIndex = 0;
11134 }
11135 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011136
Daniel Veillarda880b122003-04-21 21:36:41 +000011137 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000011138 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011139 return(0);
11140
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011141
Owen Taylor3473f882001-02-23 17:55:21 +000011142 /*
11143 * Pop-up of finished entities.
11144 */
11145 while ((RAW == 0) && (ctxt->inputNr > 1))
11146 xmlPopInput(ctxt);
11147
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011148 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011149 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011150 avail = ctxt->input->length -
11151 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011152 else {
11153 /*
11154 * If we are operating on converted input, try to flush
11155 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011156 * buffer. But do not do this in document start where
11157 * encoding="..." may not have been read and we work on a
11158 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011159 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011160 if ((ctxt->instate != XML_PARSER_START) &&
11161 (ctxt->input->buf->raw != NULL) &&
11162 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011163 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11164 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011165 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011166
11167 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011168 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11169 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011170 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011171 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011172 (ctxt->input->cur - ctxt->input->base);
11173 }
Owen Taylor3473f882001-02-23 17:55:21 +000011174 if (avail < 1)
11175 goto done;
11176 switch (ctxt->instate) {
11177 case XML_PARSER_EOF:
11178 /*
11179 * Document parsing is done !
11180 */
11181 goto done;
11182 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011183 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11184 xmlChar start[4];
11185 xmlCharEncoding enc;
11186
11187 /*
11188 * Very first chars read from the document flow.
11189 */
11190 if (avail < 4)
11191 goto done;
11192
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011193 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011194 * Get the 4 first bytes and decode the charset
11195 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011196 * plug some encoding conversion routines,
11197 * else xmlSwitchEncoding will set to (default)
11198 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011199 */
11200 start[0] = RAW;
11201 start[1] = NXT(1);
11202 start[2] = NXT(2);
11203 start[3] = NXT(3);
11204 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011205 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011206 break;
11207 }
Owen Taylor3473f882001-02-23 17:55:21 +000011208
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011209 if (avail < 2)
11210 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011211 cur = ctxt->input->cur[0];
11212 next = ctxt->input->cur[1];
11213 if (cur == 0) {
11214 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11215 ctxt->sax->setDocumentLocator(ctxt->userData,
11216 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011217 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011218 ctxt->instate = XML_PARSER_EOF;
11219#ifdef DEBUG_PUSH
11220 xmlGenericError(xmlGenericErrorContext,
11221 "PP: entering EOF\n");
11222#endif
11223 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11224 ctxt->sax->endDocument(ctxt->userData);
11225 goto done;
11226 }
11227 if ((cur == '<') && (next == '?')) {
11228 /* PI or XML decl */
11229 if (avail < 5) return(ret);
11230 if ((!terminate) &&
11231 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11232 return(ret);
11233 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11234 ctxt->sax->setDocumentLocator(ctxt->userData,
11235 &xmlDefaultSAXLocator);
11236 if ((ctxt->input->cur[2] == 'x') &&
11237 (ctxt->input->cur[3] == 'm') &&
11238 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011239 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011240 ret += 5;
11241#ifdef DEBUG_PUSH
11242 xmlGenericError(xmlGenericErrorContext,
11243 "PP: Parsing XML Decl\n");
11244#endif
11245 xmlParseXMLDecl(ctxt);
11246 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11247 /*
11248 * The XML REC instructs us to stop parsing right
11249 * here
11250 */
11251 ctxt->instate = XML_PARSER_EOF;
11252 return(0);
11253 }
11254 ctxt->standalone = ctxt->input->standalone;
11255 if ((ctxt->encoding == NULL) &&
11256 (ctxt->input->encoding != NULL))
11257 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11258 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11259 (!ctxt->disableSAX))
11260 ctxt->sax->startDocument(ctxt->userData);
11261 ctxt->instate = XML_PARSER_MISC;
11262#ifdef DEBUG_PUSH
11263 xmlGenericError(xmlGenericErrorContext,
11264 "PP: entering MISC\n");
11265#endif
11266 } else {
11267 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11268 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11269 (!ctxt->disableSAX))
11270 ctxt->sax->startDocument(ctxt->userData);
11271 ctxt->instate = XML_PARSER_MISC;
11272#ifdef DEBUG_PUSH
11273 xmlGenericError(xmlGenericErrorContext,
11274 "PP: entering MISC\n");
11275#endif
11276 }
11277 } else {
11278 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11279 ctxt->sax->setDocumentLocator(ctxt->userData,
11280 &xmlDefaultSAXLocator);
11281 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011282 if (ctxt->version == NULL) {
11283 xmlErrMemory(ctxt, NULL);
11284 break;
11285 }
Owen Taylor3473f882001-02-23 17:55:21 +000011286 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11287 (!ctxt->disableSAX))
11288 ctxt->sax->startDocument(ctxt->userData);
11289 ctxt->instate = XML_PARSER_MISC;
11290#ifdef DEBUG_PUSH
11291 xmlGenericError(xmlGenericErrorContext,
11292 "PP: entering MISC\n");
11293#endif
11294 }
11295 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011296 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011297 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011298 const xmlChar *prefix = NULL;
11299 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011300 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011301
11302 if ((avail < 2) && (ctxt->inputNr == 1))
11303 goto done;
11304 cur = ctxt->input->cur[0];
11305 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011306 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011307 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011308 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11309 ctxt->sax->endDocument(ctxt->userData);
11310 goto done;
11311 }
11312 if (!terminate) {
11313 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011314 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011315 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011316 goto done;
11317 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11318 goto done;
11319 }
11320 }
11321 if (ctxt->spaceNr == 0)
11322 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011323 else if (*ctxt->space == -2)
11324 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011325 else
11326 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011327#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011328 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011329#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011330 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011331#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011332 else
11333 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011334#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011335 if (ctxt->instate == XML_PARSER_EOF)
11336 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011337 if (name == NULL) {
11338 spacePop(ctxt);
11339 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011340 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11341 ctxt->sax->endDocument(ctxt->userData);
11342 goto done;
11343 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011344#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011345 /*
11346 * [ VC: Root Element Type ]
11347 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011348 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011349 */
11350 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11351 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11352 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011353#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011354
11355 /*
11356 * Check for an Empty Element.
11357 */
11358 if ((RAW == '/') && (NXT(1) == '>')) {
11359 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011360
11361 if (ctxt->sax2) {
11362 if ((ctxt->sax != NULL) &&
11363 (ctxt->sax->endElementNs != NULL) &&
11364 (!ctxt->disableSAX))
11365 ctxt->sax->endElementNs(ctxt->userData, name,
11366 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011367 if (ctxt->nsNr - nsNr > 0)
11368 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011369#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011370 } else {
11371 if ((ctxt->sax != NULL) &&
11372 (ctxt->sax->endElement != NULL) &&
11373 (!ctxt->disableSAX))
11374 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011375#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011376 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011377 spacePop(ctxt);
11378 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011379 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011380 } else {
11381 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011382 }
Daniel Veillard65686452012-07-19 18:25:01 +080011383 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011384 break;
11385 }
11386 if (RAW == '>') {
11387 NEXT;
11388 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011389 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011390 "Couldn't find end of Start Tag %s\n",
11391 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011392 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011393 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011394 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011395 if (ctxt->sax2)
11396 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011397#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011398 else
11399 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011400#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011401
Daniel Veillarda880b122003-04-21 21:36:41 +000011402 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011403 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011404 break;
11405 }
11406 case XML_PARSER_CONTENT: {
11407 const xmlChar *test;
11408 unsigned int cons;
11409 if ((avail < 2) && (ctxt->inputNr == 1))
11410 goto done;
11411 cur = ctxt->input->cur[0];
11412 next = ctxt->input->cur[1];
11413
11414 test = CUR_PTR;
11415 cons = ctxt->input->consumed;
11416 if ((cur == '<') && (next == '/')) {
11417 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011418 break;
11419 } else if ((cur == '<') && (next == '?')) {
11420 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011421 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11422 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011423 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011424 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011425 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011426 ctxt->instate = XML_PARSER_CONTENT;
11427 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011428 } else if ((cur == '<') && (next != '!')) {
11429 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011430 break;
11431 } else if ((cur == '<') && (next == '!') &&
11432 (ctxt->input->cur[2] == '-') &&
11433 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011434 int term;
11435
11436 if (avail < 4)
11437 goto done;
11438 ctxt->input->cur += 4;
11439 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11440 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011441 if ((!terminate) && (term < 0)) {
11442 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011443 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011444 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011445 xmlParseComment(ctxt);
11446 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011447 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011448 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11449 (ctxt->input->cur[2] == '[') &&
11450 (ctxt->input->cur[3] == 'C') &&
11451 (ctxt->input->cur[4] == 'D') &&
11452 (ctxt->input->cur[5] == 'A') &&
11453 (ctxt->input->cur[6] == 'T') &&
11454 (ctxt->input->cur[7] == 'A') &&
11455 (ctxt->input->cur[8] == '[')) {
11456 SKIP(9);
11457 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011458 break;
11459 } else if ((cur == '<') && (next == '!') &&
11460 (avail < 9)) {
11461 goto done;
11462 } else if (cur == '&') {
11463 if ((!terminate) &&
11464 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11465 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011466 xmlParseReference(ctxt);
11467 } else {
11468 /* TODO Avoid the extra copy, handle directly !!! */
11469 /*
11470 * Goal of the following test is:
11471 * - minimize calls to the SAX 'character' callback
11472 * when they are mergeable
11473 * - handle an problem for isBlank when we only parse
11474 * a sequence of blank chars and the next one is
11475 * not available to check against '<' presence.
11476 * - tries to homogenize the differences in SAX
11477 * callbacks between the push and pull versions
11478 * of the parser.
11479 */
11480 if ((ctxt->inputNr == 1) &&
11481 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11482 if (!terminate) {
11483 if (ctxt->progressive) {
11484 if ((lastlt == NULL) ||
11485 (ctxt->input->cur > lastlt))
11486 goto done;
11487 } else if (xmlParseLookupSequence(ctxt,
11488 '<', 0, 0) < 0) {
11489 goto done;
11490 }
11491 }
11492 }
11493 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011494 xmlParseCharData(ctxt, 0);
11495 }
11496 /*
11497 * Pop-up of finished entities.
11498 */
11499 while ((RAW == 0) && (ctxt->inputNr > 1))
11500 xmlPopInput(ctxt);
11501 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011502 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11503 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011504 ctxt->instate = XML_PARSER_EOF;
11505 break;
11506 }
11507 break;
11508 }
11509 case XML_PARSER_END_TAG:
11510 if (avail < 2)
11511 goto done;
11512 if (!terminate) {
11513 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011514 /* > can be found unescaped in attribute values */
11515 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011516 goto done;
11517 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11518 goto done;
11519 }
11520 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011521 if (ctxt->sax2) {
11522 xmlParseEndTag2(ctxt,
11523 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11524 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011525 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011526 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011527 }
11528#ifdef LIBXML_SAX1_ENABLED
11529 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011530 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011531#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011532 if (ctxt->instate == XML_PARSER_EOF) {
11533 /* Nothing */
11534 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011535 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011536 } else {
11537 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011538 }
11539 break;
11540 case XML_PARSER_CDATA_SECTION: {
11541 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011542 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011543 * cdataBlock merge back contiguous callbacks.
11544 */
11545 int base;
11546
11547 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11548 if (base < 0) {
11549 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011550 int tmp;
11551
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011552 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011553 XML_PARSER_BIG_BUFFER_SIZE);
11554 if (tmp < 0) {
11555 tmp = -tmp;
11556 ctxt->input->cur += tmp;
11557 goto encoding_error;
11558 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011559 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11560 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011561 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011562 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011563 else if (ctxt->sax->characters != NULL)
11564 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011565 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011566 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011567 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011568 ctxt->checkIndex = 0;
11569 }
11570 goto done;
11571 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011572 int tmp;
11573
11574 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11575 if ((tmp < 0) || (tmp != base)) {
11576 tmp = -tmp;
11577 ctxt->input->cur += tmp;
11578 goto encoding_error;
11579 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011580 if ((ctxt->sax != NULL) && (base == 0) &&
11581 (ctxt->sax->cdataBlock != NULL) &&
11582 (!ctxt->disableSAX)) {
11583 /*
11584 * Special case to provide identical behaviour
11585 * between pull and push parsers on enpty CDATA
11586 * sections
11587 */
11588 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11589 (!strncmp((const char *)&ctxt->input->cur[-9],
11590 "<![CDATA[", 9)))
11591 ctxt->sax->cdataBlock(ctxt->userData,
11592 BAD_CAST "", 0);
11593 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011594 (!ctxt->disableSAX)) {
11595 if (ctxt->sax->cdataBlock != NULL)
11596 ctxt->sax->cdataBlock(ctxt->userData,
11597 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011598 else if (ctxt->sax->characters != NULL)
11599 ctxt->sax->characters(ctxt->userData,
11600 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011601 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011602 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011603 ctxt->checkIndex = 0;
11604 ctxt->instate = XML_PARSER_CONTENT;
11605#ifdef DEBUG_PUSH
11606 xmlGenericError(xmlGenericErrorContext,
11607 "PP: entering CONTENT\n");
11608#endif
11609 }
11610 break;
11611 }
Owen Taylor3473f882001-02-23 17:55:21 +000011612 case XML_PARSER_MISC:
11613 SKIP_BLANKS;
11614 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011615 avail = ctxt->input->length -
11616 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011617 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011618 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011619 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011620 if (avail < 2)
11621 goto done;
11622 cur = ctxt->input->cur[0];
11623 next = ctxt->input->cur[1];
11624 if ((cur == '<') && (next == '?')) {
11625 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011626 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11627 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011628 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011629 }
Owen Taylor3473f882001-02-23 17:55:21 +000011630#ifdef DEBUG_PUSH
11631 xmlGenericError(xmlGenericErrorContext,
11632 "PP: Parsing PI\n");
11633#endif
11634 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011635 ctxt->instate = XML_PARSER_MISC;
11636 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011637 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011638 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011639 (ctxt->input->cur[2] == '-') &&
11640 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011641 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011642 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11643 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011644 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011645 }
Owen Taylor3473f882001-02-23 17:55:21 +000011646#ifdef DEBUG_PUSH
11647 xmlGenericError(xmlGenericErrorContext,
11648 "PP: Parsing Comment\n");
11649#endif
11650 xmlParseComment(ctxt);
11651 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011652 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011653 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011654 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011655 (ctxt->input->cur[2] == 'D') &&
11656 (ctxt->input->cur[3] == 'O') &&
11657 (ctxt->input->cur[4] == 'C') &&
11658 (ctxt->input->cur[5] == 'T') &&
11659 (ctxt->input->cur[6] == 'Y') &&
11660 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011661 (ctxt->input->cur[8] == 'E')) {
11662 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011663 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11664 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011665 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011666 }
Owen Taylor3473f882001-02-23 17:55:21 +000011667#ifdef DEBUG_PUSH
11668 xmlGenericError(xmlGenericErrorContext,
11669 "PP: Parsing internal subset\n");
11670#endif
11671 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011672 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011673 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011674 xmlParseDocTypeDecl(ctxt);
11675 if (RAW == '[') {
11676 ctxt->instate = XML_PARSER_DTD;
11677#ifdef DEBUG_PUSH
11678 xmlGenericError(xmlGenericErrorContext,
11679 "PP: entering DTD\n");
11680#endif
11681 } else {
11682 /*
11683 * Create and update the external subset.
11684 */
11685 ctxt->inSubset = 2;
11686 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11687 (ctxt->sax->externalSubset != NULL))
11688 ctxt->sax->externalSubset(ctxt->userData,
11689 ctxt->intSubName, ctxt->extSubSystem,
11690 ctxt->extSubURI);
11691 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011692 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011693 ctxt->instate = XML_PARSER_PROLOG;
11694#ifdef DEBUG_PUSH
11695 xmlGenericError(xmlGenericErrorContext,
11696 "PP: entering PROLOG\n");
11697#endif
11698 }
11699 } else if ((cur == '<') && (next == '!') &&
11700 (avail < 9)) {
11701 goto done;
11702 } else {
11703 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011704 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011705 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011706#ifdef DEBUG_PUSH
11707 xmlGenericError(xmlGenericErrorContext,
11708 "PP: entering START_TAG\n");
11709#endif
11710 }
11711 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011712 case XML_PARSER_PROLOG:
11713 SKIP_BLANKS;
11714 if (ctxt->input->buf == NULL)
11715 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11716 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011717 avail = xmlBufUse(ctxt->input->buf->buffer) -
11718 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011719 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011720 goto done;
11721 cur = ctxt->input->cur[0];
11722 next = ctxt->input->cur[1];
11723 if ((cur == '<') && (next == '?')) {
11724 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011725 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11726 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011727 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011728 }
Owen Taylor3473f882001-02-23 17:55:21 +000011729#ifdef DEBUG_PUSH
11730 xmlGenericError(xmlGenericErrorContext,
11731 "PP: Parsing PI\n");
11732#endif
11733 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011734 ctxt->instate = XML_PARSER_PROLOG;
11735 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011736 } else if ((cur == '<') && (next == '!') &&
11737 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11738 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011739 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11740 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011741 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011742 }
Owen Taylor3473f882001-02-23 17:55:21 +000011743#ifdef DEBUG_PUSH
11744 xmlGenericError(xmlGenericErrorContext,
11745 "PP: Parsing Comment\n");
11746#endif
11747 xmlParseComment(ctxt);
11748 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011749 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011750 } else if ((cur == '<') && (next == '!') &&
11751 (avail < 4)) {
11752 goto done;
11753 } else {
11754 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011755 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011756 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011757 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011758#ifdef DEBUG_PUSH
11759 xmlGenericError(xmlGenericErrorContext,
11760 "PP: entering START_TAG\n");
11761#endif
11762 }
11763 break;
11764 case XML_PARSER_EPILOG:
11765 SKIP_BLANKS;
11766 if (ctxt->input->buf == NULL)
11767 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11768 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011769 avail = xmlBufUse(ctxt->input->buf->buffer) -
11770 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011771 if (avail < 2)
11772 goto done;
11773 cur = ctxt->input->cur[0];
11774 next = ctxt->input->cur[1];
11775 if ((cur == '<') && (next == '?')) {
11776 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011777 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11778 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011779 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011780 }
Owen Taylor3473f882001-02-23 17:55:21 +000011781#ifdef DEBUG_PUSH
11782 xmlGenericError(xmlGenericErrorContext,
11783 "PP: Parsing PI\n");
11784#endif
11785 xmlParsePI(ctxt);
11786 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011787 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011788 } else if ((cur == '<') && (next == '!') &&
11789 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11790 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011791 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11792 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011793 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011794 }
Owen Taylor3473f882001-02-23 17:55:21 +000011795#ifdef DEBUG_PUSH
11796 xmlGenericError(xmlGenericErrorContext,
11797 "PP: Parsing Comment\n");
11798#endif
11799 xmlParseComment(ctxt);
11800 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011801 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011802 } else if ((cur == '<') && (next == '!') &&
11803 (avail < 4)) {
11804 goto done;
11805 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011806 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011807 ctxt->instate = XML_PARSER_EOF;
11808#ifdef DEBUG_PUSH
11809 xmlGenericError(xmlGenericErrorContext,
11810 "PP: entering EOF\n");
11811#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011812 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011813 ctxt->sax->endDocument(ctxt->userData);
11814 goto done;
11815 }
11816 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011817 case XML_PARSER_DTD: {
11818 /*
11819 * Sorry but progressive parsing of the internal subset
11820 * is not expected to be supported. We first check that
11821 * the full content of the internal subset is available and
11822 * the parsing is launched only at that point.
11823 * Internal subset ends up with "']' S? '>'" in an unescaped
11824 * section and not in a ']]>' sequence which are conditional
11825 * sections (whoever argued to keep that crap in XML deserve
11826 * a place in hell !).
11827 */
11828 int base, i;
11829 xmlChar *buf;
11830 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011831 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011832
11833 base = ctxt->input->cur - ctxt->input->base;
11834 if (base < 0) return(0);
11835 if (ctxt->checkIndex > base)
11836 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011837 buf = xmlBufContent(ctxt->input->buf->buffer);
11838 use = xmlBufUse(ctxt->input->buf->buffer);
11839 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011840 if (quote != 0) {
11841 if (buf[base] == quote)
11842 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011843 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011844 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011845 if ((quote == 0) && (buf[base] == '<')) {
11846 int found = 0;
11847 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011848 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011849 (buf[base + 1] == '!') &&
11850 (buf[base + 2] == '-') &&
11851 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011852 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011853 if ((buf[base] == '-') &&
11854 (buf[base + 1] == '-') &&
11855 (buf[base + 2] == '>')) {
11856 found = 1;
11857 base += 2;
11858 break;
11859 }
11860 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011861 if (!found) {
11862#if 0
11863 fprintf(stderr, "unfinished comment\n");
11864#endif
11865 break; /* for */
11866 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011867 continue;
11868 }
11869 }
Owen Taylor3473f882001-02-23 17:55:21 +000011870 if (buf[base] == '"') {
11871 quote = '"';
11872 continue;
11873 }
11874 if (buf[base] == '\'') {
11875 quote = '\'';
11876 continue;
11877 }
11878 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011879#if 0
11880 fprintf(stderr, "%c%c%c%c: ", buf[base],
11881 buf[base + 1], buf[base + 2], buf[base + 3]);
11882#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011883 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011884 break;
11885 if (buf[base + 1] == ']') {
11886 /* conditional crap, skip both ']' ! */
11887 base++;
11888 continue;
11889 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011890 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011891 if (buf[base + i] == '>') {
11892#if 0
11893 fprintf(stderr, "found\n");
11894#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011895 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011896 }
11897 if (!IS_BLANK_CH(buf[base + i])) {
11898#if 0
11899 fprintf(stderr, "not found\n");
11900#endif
11901 goto not_end_of_int_subset;
11902 }
Owen Taylor3473f882001-02-23 17:55:21 +000011903 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011904#if 0
11905 fprintf(stderr, "end of stream\n");
11906#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011907 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011908
Owen Taylor3473f882001-02-23 17:55:21 +000011909 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011910not_end_of_int_subset:
11911 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011912 }
11913 /*
11914 * We didn't found the end of the Internal subset
11915 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011916 if (quote == 0)
11917 ctxt->checkIndex = base;
11918 else
11919 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011920#ifdef DEBUG_PUSH
11921 if (next == 0)
11922 xmlGenericError(xmlGenericErrorContext,
11923 "PP: lookup of int subset end filed\n");
11924#endif
11925 goto done;
11926
11927found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011928 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011929 xmlParseInternalSubset(ctxt);
11930 ctxt->inSubset = 2;
11931 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11932 (ctxt->sax->externalSubset != NULL))
11933 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11934 ctxt->extSubSystem, ctxt->extSubURI);
11935 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011936 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011937 ctxt->instate = XML_PARSER_PROLOG;
11938 ctxt->checkIndex = 0;
11939#ifdef DEBUG_PUSH
11940 xmlGenericError(xmlGenericErrorContext,
11941 "PP: entering PROLOG\n");
11942#endif
11943 break;
11944 }
11945 case XML_PARSER_COMMENT:
11946 xmlGenericError(xmlGenericErrorContext,
11947 "PP: internal error, state == COMMENT\n");
11948 ctxt->instate = XML_PARSER_CONTENT;
11949#ifdef DEBUG_PUSH
11950 xmlGenericError(xmlGenericErrorContext,
11951 "PP: entering CONTENT\n");
11952#endif
11953 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011954 case XML_PARSER_IGNORE:
11955 xmlGenericError(xmlGenericErrorContext,
11956 "PP: internal error, state == IGNORE");
11957 ctxt->instate = XML_PARSER_DTD;
11958#ifdef DEBUG_PUSH
11959 xmlGenericError(xmlGenericErrorContext,
11960 "PP: entering DTD\n");
11961#endif
11962 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011963 case XML_PARSER_PI:
11964 xmlGenericError(xmlGenericErrorContext,
11965 "PP: internal error, state == PI\n");
11966 ctxt->instate = XML_PARSER_CONTENT;
11967#ifdef DEBUG_PUSH
11968 xmlGenericError(xmlGenericErrorContext,
11969 "PP: entering CONTENT\n");
11970#endif
11971 break;
11972 case XML_PARSER_ENTITY_DECL:
11973 xmlGenericError(xmlGenericErrorContext,
11974 "PP: internal error, state == ENTITY_DECL\n");
11975 ctxt->instate = XML_PARSER_DTD;
11976#ifdef DEBUG_PUSH
11977 xmlGenericError(xmlGenericErrorContext,
11978 "PP: entering DTD\n");
11979#endif
11980 break;
11981 case XML_PARSER_ENTITY_VALUE:
11982 xmlGenericError(xmlGenericErrorContext,
11983 "PP: internal error, state == ENTITY_VALUE\n");
11984 ctxt->instate = XML_PARSER_CONTENT;
11985#ifdef DEBUG_PUSH
11986 xmlGenericError(xmlGenericErrorContext,
11987 "PP: entering DTD\n");
11988#endif
11989 break;
11990 case XML_PARSER_ATTRIBUTE_VALUE:
11991 xmlGenericError(xmlGenericErrorContext,
11992 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11993 ctxt->instate = XML_PARSER_START_TAG;
11994#ifdef DEBUG_PUSH
11995 xmlGenericError(xmlGenericErrorContext,
11996 "PP: entering START_TAG\n");
11997#endif
11998 break;
11999 case XML_PARSER_SYSTEM_LITERAL:
12000 xmlGenericError(xmlGenericErrorContext,
12001 "PP: internal error, state == SYSTEM_LITERAL\n");
12002 ctxt->instate = XML_PARSER_START_TAG;
12003#ifdef DEBUG_PUSH
12004 xmlGenericError(xmlGenericErrorContext,
12005 "PP: entering START_TAG\n");
12006#endif
12007 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012008 case XML_PARSER_PUBLIC_LITERAL:
12009 xmlGenericError(xmlGenericErrorContext,
12010 "PP: internal error, state == PUBLIC_LITERAL\n");
12011 ctxt->instate = XML_PARSER_START_TAG;
12012#ifdef DEBUG_PUSH
12013 xmlGenericError(xmlGenericErrorContext,
12014 "PP: entering START_TAG\n");
12015#endif
12016 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012017 }
12018 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012019done:
Owen Taylor3473f882001-02-23 17:55:21 +000012020#ifdef DEBUG_PUSH
12021 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12022#endif
12023 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012024encoding_error:
12025 {
12026 char buffer[150];
12027
12028 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12029 ctxt->input->cur[0], ctxt->input->cur[1],
12030 ctxt->input->cur[2], ctxt->input->cur[3]);
12031 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12032 "Input is not proper UTF-8, indicate encoding !\n%s",
12033 BAD_CAST buffer, NULL);
12034 }
12035 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012036}
12037
12038/**
Daniel Veillard65686452012-07-19 18:25:01 +080012039 * xmlParseCheckTransition:
12040 * @ctxt: an XML parser context
12041 * @chunk: a char array
12042 * @size: the size in byte of the chunk
12043 *
12044 * Check depending on the current parser state if the chunk given must be
12045 * processed immediately or one need more data to advance on parsing.
12046 *
12047 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12048 */
12049static int
12050xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12051 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12052 return(-1);
12053 if (ctxt->instate == XML_PARSER_START_TAG) {
12054 if (memchr(chunk, '>', size) != NULL)
12055 return(1);
12056 return(0);
12057 }
12058 if (ctxt->progressive == XML_PARSER_COMMENT) {
12059 if (memchr(chunk, '>', size) != NULL)
12060 return(1);
12061 return(0);
12062 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012063 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12064 if (memchr(chunk, '>', size) != NULL)
12065 return(1);
12066 return(0);
12067 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012068 if (ctxt->progressive == XML_PARSER_PI) {
12069 if (memchr(chunk, '>', size) != NULL)
12070 return(1);
12071 return(0);
12072 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012073 if (ctxt->instate == XML_PARSER_END_TAG) {
12074 if (memchr(chunk, '>', size) != NULL)
12075 return(1);
12076 return(0);
12077 }
12078 if ((ctxt->progressive == XML_PARSER_DTD) ||
12079 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012080 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012081 return(1);
12082 return(0);
12083 }
Daniel Veillard65686452012-07-19 18:25:01 +080012084 return(1);
12085}
12086
12087/**
Owen Taylor3473f882001-02-23 17:55:21 +000012088 * xmlParseChunk:
12089 * @ctxt: an XML parser context
12090 * @chunk: an char array
12091 * @size: the size in byte of the chunk
12092 * @terminate: last chunk indicator
12093 *
12094 * Parse a Chunk of memory
12095 *
12096 * Returns zero if no error, the xmlParserErrors otherwise.
12097 */
12098int
12099xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12100 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012101 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012102 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012103 size_t old_avail = 0;
12104 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012105
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012106 if (ctxt == NULL)
12107 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012108 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012109 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012110 if (ctxt->instate == XML_PARSER_EOF)
12111 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012112 if (ctxt->instate == XML_PARSER_START)
12113 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012114 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12115 (chunk[size - 1] == '\r')) {
12116 end_in_lf = 1;
12117 size--;
12118 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012119
12120xmldecl_done:
12121
Owen Taylor3473f882001-02-23 17:55:21 +000012122 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12123 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012124 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12125 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012126 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012127
Daniel Veillard65686452012-07-19 18:25:01 +080012128 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012129 /*
12130 * Specific handling if we autodetected an encoding, we should not
12131 * push more than the first line ... which depend on the encoding
12132 * And only push the rest once the final encoding was detected
12133 */
12134 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12135 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012136 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012137
12138 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12139 BAD_CAST "UTF-16")) ||
12140 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12141 BAD_CAST "UTF16")))
12142 len = 90;
12143 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12144 BAD_CAST "UCS-4")) ||
12145 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12146 BAD_CAST "UCS4")))
12147 len = 180;
12148
12149 if (ctxt->input->buf->rawconsumed < len)
12150 len -= ctxt->input->buf->rawconsumed;
12151
Raul Hudeaba9716a2010-03-15 10:13:29 +010012152 /*
12153 * Change size for reading the initial declaration only
12154 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12155 * will blindly copy extra bytes from memory.
12156 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012157 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012158 remain = size - len;
12159 size = len;
12160 } else {
12161 remain = 0;
12162 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012163 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012164 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012165 if (res < 0) {
12166 ctxt->errNo = XML_PARSER_EOF;
12167 ctxt->disableSAX = 1;
12168 return (XML_PARSER_EOF);
12169 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012170 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012171#ifdef DEBUG_PUSH
12172 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12173#endif
12174
Owen Taylor3473f882001-02-23 17:55:21 +000012175 } else if (ctxt->instate != XML_PARSER_EOF) {
12176 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12177 xmlParserInputBufferPtr in = ctxt->input->buf;
12178 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12179 (in->raw != NULL)) {
12180 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012181 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12182 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012183
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012184 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012185 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012186 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012187 xmlGenericError(xmlGenericErrorContext,
12188 "xmlParseChunk: encoder error\n");
12189 return(XML_ERR_INVALID_ENCODING);
12190 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012191 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012192 }
12193 }
12194 }
Daniel Veillard65686452012-07-19 18:25:01 +080012195 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012196 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012197 } else {
12198 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12199 avail = xmlBufUse(ctxt->input->buf->buffer);
12200 /*
12201 * Depending on the current state it may not be such
12202 * a good idea to try parsing if there is nothing in the chunk
12203 * which would be worth doing a parser state transition and we
12204 * need to wait for more data
12205 */
12206 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12207 (old_avail == 0) || (avail == 0) ||
12208 (xmlParseCheckTransition(ctxt,
12209 (const char *)&ctxt->input->base[old_avail],
12210 avail - old_avail)))
12211 xmlParseTryOrFinish(ctxt, terminate);
12212 }
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012213 if ((ctxt->input != NULL) &&
12214 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12215 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12216 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12217 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12218 ctxt->instate = XML_PARSER_EOF;
12219 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012220 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12221 return(ctxt->errNo);
12222
12223 if (remain != 0) {
12224 chunk += size;
12225 size = remain;
12226 remain = 0;
12227 goto xmldecl_done;
12228 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012229 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12230 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012231 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12232 ctxt->input);
12233 size_t current = ctxt->input->cur - ctxt->input->base;
12234
Daniel Veillarda617e242006-01-09 14:38:44 +000012235 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012236
12237 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12238 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012239 }
Owen Taylor3473f882001-02-23 17:55:21 +000012240 if (terminate) {
12241 /*
12242 * Check for termination
12243 */
Daniel Veillard65686452012-07-19 18:25:01 +080012244 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012245
12246 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012247 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012248 cur_avail = ctxt->input->length -
12249 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012250 else
Daniel Veillard65686452012-07-19 18:25:01 +080012251 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12252 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012253 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012254
Owen Taylor3473f882001-02-23 17:55:21 +000012255 if ((ctxt->instate != XML_PARSER_EOF) &&
12256 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012257 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012258 }
Daniel Veillard65686452012-07-19 18:25:01 +080012259 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012260 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012261 }
Owen Taylor3473f882001-02-23 17:55:21 +000012262 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012263 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012264 ctxt->sax->endDocument(ctxt->userData);
12265 }
12266 ctxt->instate = XML_PARSER_EOF;
12267 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012268 if (ctxt->wellFormed == 0)
12269 return((xmlParserErrors) ctxt->errNo);
12270 else
12271 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012272}
12273
12274/************************************************************************
12275 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012276 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012277 * *
12278 ************************************************************************/
12279
12280/**
Owen Taylor3473f882001-02-23 17:55:21 +000012281 * xmlCreatePushParserCtxt:
12282 * @sax: a SAX handler
12283 * @user_data: The user data returned on SAX callbacks
12284 * @chunk: a pointer to an array of chars
12285 * @size: number of chars in the array
12286 * @filename: an optional file name or URI
12287 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012288 * Create a parser context for using the XML parser in push mode.
12289 * If @buffer and @size are non-NULL, the data is used to detect
12290 * the encoding. The remaining characters will be parsed so they
12291 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012292 * To allow content encoding detection, @size should be >= 4
12293 * The value of @filename is used for fetching external entities
12294 * and error/warning reports.
12295 *
12296 * Returns the new parser context or NULL
12297 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012298
Owen Taylor3473f882001-02-23 17:55:21 +000012299xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012300xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012301 const char *chunk, int size, const char *filename) {
12302 xmlParserCtxtPtr ctxt;
12303 xmlParserInputPtr inputStream;
12304 xmlParserInputBufferPtr buf;
12305 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12306
12307 /*
12308 * plug some encoding conversion routines
12309 */
12310 if ((chunk != NULL) && (size >= 4))
12311 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12312
12313 buf = xmlAllocParserInputBuffer(enc);
12314 if (buf == NULL) return(NULL);
12315
12316 ctxt = xmlNewParserCtxt();
12317 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012318 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012319 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012320 return(NULL);
12321 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012322 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012323 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12324 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012325 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012326 xmlFreeParserInputBuffer(buf);
12327 xmlFreeParserCtxt(ctxt);
12328 return(NULL);
12329 }
Owen Taylor3473f882001-02-23 17:55:21 +000012330 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012331#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012332 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012333#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012334 xmlFree(ctxt->sax);
12335 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12336 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012337 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012338 xmlFreeParserInputBuffer(buf);
12339 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012340 return(NULL);
12341 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012342 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12343 if (sax->initialized == XML_SAX2_MAGIC)
12344 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12345 else
12346 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012347 if (user_data != NULL)
12348 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012349 }
Owen Taylor3473f882001-02-23 17:55:21 +000012350 if (filename == NULL) {
12351 ctxt->directory = NULL;
12352 } else {
12353 ctxt->directory = xmlParserGetDirectory(filename);
12354 }
12355
12356 inputStream = xmlNewInputStream(ctxt);
12357 if (inputStream == NULL) {
12358 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012359 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012360 return(NULL);
12361 }
12362
12363 if (filename == NULL)
12364 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012365 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012366 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012367 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012368 if (inputStream->filename == NULL) {
12369 xmlFreeParserCtxt(ctxt);
12370 xmlFreeParserInputBuffer(buf);
12371 return(NULL);
12372 }
12373 }
Owen Taylor3473f882001-02-23 17:55:21 +000012374 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012375 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012376 inputPush(ctxt, inputStream);
12377
William M. Brack3a1cd212005-02-11 14:35:54 +000012378 /*
12379 * If the caller didn't provide an initial 'chunk' for determining
12380 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12381 * that it can be automatically determined later
12382 */
12383 if ((size == 0) || (chunk == NULL)) {
12384 ctxt->charset = XML_CHAR_ENCODING_NONE;
12385 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012386 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12387 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012388
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012389 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012390
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012391 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012392#ifdef DEBUG_PUSH
12393 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12394#endif
12395 }
12396
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012397 if (enc != XML_CHAR_ENCODING_NONE) {
12398 xmlSwitchEncoding(ctxt, enc);
12399 }
12400
Owen Taylor3473f882001-02-23 17:55:21 +000012401 return(ctxt);
12402}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012403#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012404
12405/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012406 * xmlStopParser:
12407 * @ctxt: an XML parser context
12408 *
12409 * Blocks further parser processing
12410 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012411void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012412xmlStopParser(xmlParserCtxtPtr ctxt) {
12413 if (ctxt == NULL)
12414 return;
12415 ctxt->instate = XML_PARSER_EOF;
12416 ctxt->disableSAX = 1;
12417 if (ctxt->input != NULL) {
12418 ctxt->input->cur = BAD_CAST"";
12419 ctxt->input->base = ctxt->input->cur;
12420 }
12421}
12422
12423/**
Owen Taylor3473f882001-02-23 17:55:21 +000012424 * xmlCreateIOParserCtxt:
12425 * @sax: a SAX handler
12426 * @user_data: The user data returned on SAX callbacks
12427 * @ioread: an I/O read function
12428 * @ioclose: an I/O close function
12429 * @ioctx: an I/O handler
12430 * @enc: the charset encoding if known
12431 *
12432 * Create a parser context for using the XML parser with an existing
12433 * I/O stream
12434 *
12435 * Returns the new parser context or NULL
12436 */
12437xmlParserCtxtPtr
12438xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12439 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12440 void *ioctx, xmlCharEncoding enc) {
12441 xmlParserCtxtPtr ctxt;
12442 xmlParserInputPtr inputStream;
12443 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012444
Daniel Veillard42595322004-11-08 10:52:06 +000012445 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012446
12447 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012448 if (buf == NULL) {
12449 if (ioclose != NULL)
12450 ioclose(ioctx);
12451 return (NULL);
12452 }
Owen Taylor3473f882001-02-23 17:55:21 +000012453
12454 ctxt = xmlNewParserCtxt();
12455 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012456 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012457 return(NULL);
12458 }
12459 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012460#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012461 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012462#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012463 xmlFree(ctxt->sax);
12464 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12465 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012466 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012467 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012468 return(NULL);
12469 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012470 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12471 if (sax->initialized == XML_SAX2_MAGIC)
12472 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12473 else
12474 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012475 if (user_data != NULL)
12476 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012477 }
Owen Taylor3473f882001-02-23 17:55:21 +000012478
12479 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12480 if (inputStream == NULL) {
12481 xmlFreeParserCtxt(ctxt);
12482 return(NULL);
12483 }
12484 inputPush(ctxt, inputStream);
12485
12486 return(ctxt);
12487}
12488
Daniel Veillard4432df22003-09-28 18:58:27 +000012489#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012490/************************************************************************
12491 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012492 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012493 * *
12494 ************************************************************************/
12495
12496/**
12497 * xmlIOParseDTD:
12498 * @sax: the SAX handler block or NULL
12499 * @input: an Input Buffer
12500 * @enc: the charset encoding if known
12501 *
12502 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012503 *
Owen Taylor3473f882001-02-23 17:55:21 +000012504 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012505 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012506 */
12507
12508xmlDtdPtr
12509xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12510 xmlCharEncoding enc) {
12511 xmlDtdPtr ret = NULL;
12512 xmlParserCtxtPtr ctxt;
12513 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012514 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012515
12516 if (input == NULL)
12517 return(NULL);
12518
12519 ctxt = xmlNewParserCtxt();
12520 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012521 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012522 return(NULL);
12523 }
12524
12525 /*
12526 * Set-up the SAX context
12527 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012528 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012529 if (ctxt->sax != NULL)
12530 xmlFree(ctxt->sax);
12531 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012532 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012533 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012534 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012535
12536 /*
12537 * generate a parser input from the I/O handler
12538 */
12539
Daniel Veillard43caefb2003-12-07 19:32:22 +000012540 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012541 if (pinput == NULL) {
12542 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012543 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012544 xmlFreeParserCtxt(ctxt);
12545 return(NULL);
12546 }
12547
12548 /*
12549 * plug some encoding conversion routines here.
12550 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012551 if (xmlPushInput(ctxt, pinput) < 0) {
12552 if (sax != NULL) ctxt->sax = NULL;
12553 xmlFreeParserCtxt(ctxt);
12554 return(NULL);
12555 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012556 if (enc != XML_CHAR_ENCODING_NONE) {
12557 xmlSwitchEncoding(ctxt, enc);
12558 }
Owen Taylor3473f882001-02-23 17:55:21 +000012559
12560 pinput->filename = NULL;
12561 pinput->line = 1;
12562 pinput->col = 1;
12563 pinput->base = ctxt->input->cur;
12564 pinput->cur = ctxt->input->cur;
12565 pinput->free = NULL;
12566
12567 /*
12568 * let's parse that entity knowing it's an external subset.
12569 */
12570 ctxt->inSubset = 2;
12571 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012572 if (ctxt->myDoc == NULL) {
12573 xmlErrMemory(ctxt, "New Doc failed");
12574 return(NULL);
12575 }
12576 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012577 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12578 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012579
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012580 if ((enc == XML_CHAR_ENCODING_NONE) &&
12581 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012582 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012583 * Get the 4 first bytes and decode the charset
12584 * if enc != XML_CHAR_ENCODING_NONE
12585 * plug some encoding conversion routines.
12586 */
12587 start[0] = RAW;
12588 start[1] = NXT(1);
12589 start[2] = NXT(2);
12590 start[3] = NXT(3);
12591 enc = xmlDetectCharEncoding(start, 4);
12592 if (enc != XML_CHAR_ENCODING_NONE) {
12593 xmlSwitchEncoding(ctxt, enc);
12594 }
12595 }
12596
Owen Taylor3473f882001-02-23 17:55:21 +000012597 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12598
12599 if (ctxt->myDoc != NULL) {
12600 if (ctxt->wellFormed) {
12601 ret = ctxt->myDoc->extSubset;
12602 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012603 if (ret != NULL) {
12604 xmlNodePtr tmp;
12605
12606 ret->doc = NULL;
12607 tmp = ret->children;
12608 while (tmp != NULL) {
12609 tmp->doc = NULL;
12610 tmp = tmp->next;
12611 }
12612 }
Owen Taylor3473f882001-02-23 17:55:21 +000012613 } else {
12614 ret = NULL;
12615 }
12616 xmlFreeDoc(ctxt->myDoc);
12617 ctxt->myDoc = NULL;
12618 }
12619 if (sax != NULL) ctxt->sax = NULL;
12620 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012621
Owen Taylor3473f882001-02-23 17:55:21 +000012622 return(ret);
12623}
12624
12625/**
12626 * xmlSAXParseDTD:
12627 * @sax: the SAX handler block
12628 * @ExternalID: a NAME* containing the External ID of the DTD
12629 * @SystemID: a NAME* containing the URL to the DTD
12630 *
12631 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012632 *
Owen Taylor3473f882001-02-23 17:55:21 +000012633 * Returns the resulting xmlDtdPtr or NULL in case of error.
12634 */
12635
12636xmlDtdPtr
12637xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12638 const xmlChar *SystemID) {
12639 xmlDtdPtr ret = NULL;
12640 xmlParserCtxtPtr ctxt;
12641 xmlParserInputPtr input = NULL;
12642 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012643 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012644
12645 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12646
12647 ctxt = xmlNewParserCtxt();
12648 if (ctxt == NULL) {
12649 return(NULL);
12650 }
12651
12652 /*
12653 * Set-up the SAX context
12654 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012655 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012656 if (ctxt->sax != NULL)
12657 xmlFree(ctxt->sax);
12658 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012659 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012660 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012661
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012662 /*
12663 * Canonicalise the system ID
12664 */
12665 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012666 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012667 xmlFreeParserCtxt(ctxt);
12668 return(NULL);
12669 }
Owen Taylor3473f882001-02-23 17:55:21 +000012670
12671 /*
12672 * Ask the Entity resolver to load the damn thing
12673 */
12674
12675 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012676 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12677 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012678 if (input == NULL) {
12679 if (sax != NULL) ctxt->sax = NULL;
12680 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012681 if (systemIdCanonic != NULL)
12682 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012683 return(NULL);
12684 }
12685
12686 /*
12687 * plug some encoding conversion routines here.
12688 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012689 if (xmlPushInput(ctxt, input) < 0) {
12690 if (sax != NULL) ctxt->sax = NULL;
12691 xmlFreeParserCtxt(ctxt);
12692 if (systemIdCanonic != NULL)
12693 xmlFree(systemIdCanonic);
12694 return(NULL);
12695 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012696 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12697 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12698 xmlSwitchEncoding(ctxt, enc);
12699 }
Owen Taylor3473f882001-02-23 17:55:21 +000012700
12701 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012702 input->filename = (char *) systemIdCanonic;
12703 else
12704 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012705 input->line = 1;
12706 input->col = 1;
12707 input->base = ctxt->input->cur;
12708 input->cur = ctxt->input->cur;
12709 input->free = NULL;
12710
12711 /*
12712 * let's parse that entity knowing it's an external subset.
12713 */
12714 ctxt->inSubset = 2;
12715 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012716 if (ctxt->myDoc == NULL) {
12717 xmlErrMemory(ctxt, "New Doc failed");
12718 if (sax != NULL) ctxt->sax = NULL;
12719 xmlFreeParserCtxt(ctxt);
12720 return(NULL);
12721 }
12722 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012723 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12724 ExternalID, SystemID);
12725 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12726
12727 if (ctxt->myDoc != NULL) {
12728 if (ctxt->wellFormed) {
12729 ret = ctxt->myDoc->extSubset;
12730 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012731 if (ret != NULL) {
12732 xmlNodePtr tmp;
12733
12734 ret->doc = NULL;
12735 tmp = ret->children;
12736 while (tmp != NULL) {
12737 tmp->doc = NULL;
12738 tmp = tmp->next;
12739 }
12740 }
Owen Taylor3473f882001-02-23 17:55:21 +000012741 } else {
12742 ret = NULL;
12743 }
12744 xmlFreeDoc(ctxt->myDoc);
12745 ctxt->myDoc = NULL;
12746 }
12747 if (sax != NULL) ctxt->sax = NULL;
12748 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012749
Owen Taylor3473f882001-02-23 17:55:21 +000012750 return(ret);
12751}
12752
Daniel Veillard4432df22003-09-28 18:58:27 +000012753
Owen Taylor3473f882001-02-23 17:55:21 +000012754/**
12755 * xmlParseDTD:
12756 * @ExternalID: a NAME* containing the External ID of the DTD
12757 * @SystemID: a NAME* containing the URL to the DTD
12758 *
12759 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012760 *
Owen Taylor3473f882001-02-23 17:55:21 +000012761 * Returns the resulting xmlDtdPtr or NULL in case of error.
12762 */
12763
12764xmlDtdPtr
12765xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12766 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12767}
Daniel Veillard4432df22003-09-28 18:58:27 +000012768#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012769
12770/************************************************************************
12771 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012772 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012773 * *
12774 ************************************************************************/
12775
12776/**
Owen Taylor3473f882001-02-23 17:55:21 +000012777 * xmlParseCtxtExternalEntity:
12778 * @ctx: the existing parsing context
12779 * @URL: the URL for the entity to load
12780 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012781 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012782 *
12783 * Parse an external general entity within an existing parsing context
12784 * An external general parsed entity is well-formed if it matches the
12785 * production labeled extParsedEnt.
12786 *
12787 * [78] extParsedEnt ::= TextDecl? content
12788 *
12789 * Returns 0 if the entity is well formed, -1 in case of args problem and
12790 * the parser error code otherwise
12791 */
12792
12793int
12794xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012795 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012796 xmlParserCtxtPtr ctxt;
12797 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012798 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012799 xmlSAXHandlerPtr oldsax = NULL;
12800 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012801 xmlChar start[4];
12802 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012803
Daniel Veillardce682bc2004-11-05 17:22:25 +000012804 if (ctx == NULL) return(-1);
12805
Daniel Veillard0161e632008-08-28 15:36:32 +000012806 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12807 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012808 return(XML_ERR_ENTITY_LOOP);
12809 }
12810
Daniel Veillardcda96922001-08-21 10:56:31 +000012811 if (lst != NULL)
12812 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012813 if ((URL == NULL) && (ID == NULL))
12814 return(-1);
12815 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12816 return(-1);
12817
Rob Richards798743a2009-06-19 13:54:25 -040012818 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012819 if (ctxt == NULL) {
12820 return(-1);
12821 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012822
Owen Taylor3473f882001-02-23 17:55:21 +000012823 oldsax = ctxt->sax;
12824 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012825 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012826 newDoc = xmlNewDoc(BAD_CAST "1.0");
12827 if (newDoc == NULL) {
12828 xmlFreeParserCtxt(ctxt);
12829 return(-1);
12830 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012831 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012832 if (ctx->myDoc->dict) {
12833 newDoc->dict = ctx->myDoc->dict;
12834 xmlDictReference(newDoc->dict);
12835 }
Owen Taylor3473f882001-02-23 17:55:21 +000012836 if (ctx->myDoc != NULL) {
12837 newDoc->intSubset = ctx->myDoc->intSubset;
12838 newDoc->extSubset = ctx->myDoc->extSubset;
12839 }
12840 if (ctx->myDoc->URL != NULL) {
12841 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12842 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012843 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12844 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012845 ctxt->sax = oldsax;
12846 xmlFreeParserCtxt(ctxt);
12847 newDoc->intSubset = NULL;
12848 newDoc->extSubset = NULL;
12849 xmlFreeDoc(newDoc);
12850 return(-1);
12851 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012852 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012853 nodePush(ctxt, newDoc->children);
12854 if (ctx->myDoc == NULL) {
12855 ctxt->myDoc = newDoc;
12856 } else {
12857 ctxt->myDoc = ctx->myDoc;
12858 newDoc->children->doc = ctx->myDoc;
12859 }
12860
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012861 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012862 * Get the 4 first bytes and decode the charset
12863 * if enc != XML_CHAR_ENCODING_NONE
12864 * plug some encoding conversion routines.
12865 */
12866 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012867 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12868 start[0] = RAW;
12869 start[1] = NXT(1);
12870 start[2] = NXT(2);
12871 start[3] = NXT(3);
12872 enc = xmlDetectCharEncoding(start, 4);
12873 if (enc != XML_CHAR_ENCODING_NONE) {
12874 xmlSwitchEncoding(ctxt, enc);
12875 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012876 }
12877
Owen Taylor3473f882001-02-23 17:55:21 +000012878 /*
12879 * Parse a possible text declaration first
12880 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012881 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012882 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012883 /*
12884 * An XML-1.0 document can't reference an entity not XML-1.0
12885 */
12886 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12887 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012888 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012889 "Version mismatch between document and entity\n");
12890 }
Owen Taylor3473f882001-02-23 17:55:21 +000012891 }
12892
12893 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012894 * If the user provided its own SAX callbacks then reuse the
12895 * useData callback field, otherwise the expected setup in a
12896 * DOM builder is to have userData == ctxt
12897 */
12898 if (ctx->userData == ctx)
12899 ctxt->userData = ctxt;
12900 else
12901 ctxt->userData = ctx->userData;
12902
12903 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012904 * Doing validity checking on chunk doesn't make sense
12905 */
12906 ctxt->instate = XML_PARSER_CONTENT;
12907 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012908 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012909 ctxt->loadsubset = ctx->loadsubset;
12910 ctxt->depth = ctx->depth + 1;
12911 ctxt->replaceEntities = ctx->replaceEntities;
12912 if (ctxt->validate) {
12913 ctxt->vctxt.error = ctx->vctxt.error;
12914 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012915 } else {
12916 ctxt->vctxt.error = NULL;
12917 ctxt->vctxt.warning = NULL;
12918 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012919 ctxt->vctxt.nodeTab = NULL;
12920 ctxt->vctxt.nodeNr = 0;
12921 ctxt->vctxt.nodeMax = 0;
12922 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012923 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12924 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012925 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12926 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12927 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012928 ctxt->dictNames = ctx->dictNames;
12929 ctxt->attsDefault = ctx->attsDefault;
12930 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012931 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012932
12933 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012934
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012935 ctx->validate = ctxt->validate;
12936 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012937 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012938 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012939 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012940 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012941 }
12942 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012943 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012944 }
12945
12946 if (!ctxt->wellFormed) {
12947 if (ctxt->errNo == 0)
12948 ret = 1;
12949 else
12950 ret = ctxt->errNo;
12951 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012952 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012953 xmlNodePtr cur;
12954
12955 /*
12956 * Return the newly created nodeset after unlinking it from
12957 * they pseudo parent.
12958 */
12959 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012960 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012961 while (cur != NULL) {
12962 cur->parent = NULL;
12963 cur = cur->next;
12964 }
12965 newDoc->children->children = NULL;
12966 }
12967 ret = 0;
12968 }
12969 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012970 ctxt->dict = NULL;
12971 ctxt->attsDefault = NULL;
12972 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012973 xmlFreeParserCtxt(ctxt);
12974 newDoc->intSubset = NULL;
12975 newDoc->extSubset = NULL;
12976 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012977
Owen Taylor3473f882001-02-23 17:55:21 +000012978 return(ret);
12979}
12980
12981/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012982 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012983 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012984 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012985 * @sax: the SAX handler bloc (possibly NULL)
12986 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12987 * @depth: Used for loop detection, use 0
12988 * @URL: the URL for the entity to load
12989 * @ID: the System ID for the entity to load
12990 * @list: the return value for the set of parsed nodes
12991 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012992 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012993 *
12994 * Returns 0 if the entity is well formed, -1 in case of args problem and
12995 * the parser error code otherwise
12996 */
12997
Daniel Veillard7d515752003-09-26 19:12:37 +000012998static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012999xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13000 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013001 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013002 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013003 xmlParserCtxtPtr ctxt;
13004 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013005 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013006 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013007 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013008 xmlChar start[4];
13009 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013010
Daniel Veillard0161e632008-08-28 15:36:32 +000013011 if (((depth > 40) &&
13012 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13013 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013014 return(XML_ERR_ENTITY_LOOP);
13015 }
13016
Owen Taylor3473f882001-02-23 17:55:21 +000013017 if (list != NULL)
13018 *list = NULL;
13019 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013020 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013021 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013022 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013023
13024
Rob Richards9c0aa472009-03-26 18:10:19 +000013025 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013026 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013027 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013028 if (oldctxt != NULL) {
13029 ctxt->_private = oldctxt->_private;
13030 ctxt->loadsubset = oldctxt->loadsubset;
13031 ctxt->validate = oldctxt->validate;
13032 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013033 ctxt->record_info = oldctxt->record_info;
13034 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13035 ctxt->node_seq.length = oldctxt->node_seq.length;
13036 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013037 } else {
13038 /*
13039 * Doing validity checking on chunk without context
13040 * doesn't make sense
13041 */
13042 ctxt->_private = NULL;
13043 ctxt->validate = 0;
13044 ctxt->external = 2;
13045 ctxt->loadsubset = 0;
13046 }
Owen Taylor3473f882001-02-23 17:55:21 +000013047 if (sax != NULL) {
13048 oldsax = ctxt->sax;
13049 ctxt->sax = sax;
13050 if (user_data != NULL)
13051 ctxt->userData = user_data;
13052 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013053 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013054 newDoc = xmlNewDoc(BAD_CAST "1.0");
13055 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013056 ctxt->node_seq.maximum = 0;
13057 ctxt->node_seq.length = 0;
13058 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013059 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013060 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013061 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013062 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013063 newDoc->intSubset = doc->intSubset;
13064 newDoc->extSubset = doc->extSubset;
13065 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013066 xmlDictReference(newDoc->dict);
13067
Owen Taylor3473f882001-02-23 17:55:21 +000013068 if (doc->URL != NULL) {
13069 newDoc->URL = xmlStrdup(doc->URL);
13070 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013071 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13072 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013073 if (sax != NULL)
13074 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013075 ctxt->node_seq.maximum = 0;
13076 ctxt->node_seq.length = 0;
13077 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013078 xmlFreeParserCtxt(ctxt);
13079 newDoc->intSubset = NULL;
13080 newDoc->extSubset = NULL;
13081 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013082 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013083 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013084 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013085 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013086 ctxt->myDoc = doc;
13087 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013088
Daniel Veillard0161e632008-08-28 15:36:32 +000013089 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013090 * Get the 4 first bytes and decode the charset
13091 * if enc != XML_CHAR_ENCODING_NONE
13092 * plug some encoding conversion routines.
13093 */
13094 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013095 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13096 start[0] = RAW;
13097 start[1] = NXT(1);
13098 start[2] = NXT(2);
13099 start[3] = NXT(3);
13100 enc = xmlDetectCharEncoding(start, 4);
13101 if (enc != XML_CHAR_ENCODING_NONE) {
13102 xmlSwitchEncoding(ctxt, enc);
13103 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013104 }
13105
Owen Taylor3473f882001-02-23 17:55:21 +000013106 /*
13107 * Parse a possible text declaration first
13108 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013109 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013110 xmlParseTextDecl(ctxt);
13111 }
13112
Owen Taylor3473f882001-02-23 17:55:21 +000013113 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013114 ctxt->depth = depth;
13115
13116 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013117
Daniel Veillard561b7f82002-03-20 21:55:57 +000013118 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013119 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013120 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013121 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013122 }
13123 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013124 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013125 }
13126
13127 if (!ctxt->wellFormed) {
13128 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013129 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013130 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013131 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013132 } else {
13133 if (list != NULL) {
13134 xmlNodePtr cur;
13135
13136 /*
13137 * Return the newly created nodeset after unlinking it from
13138 * they pseudo parent.
13139 */
13140 cur = newDoc->children->children;
13141 *list = cur;
13142 while (cur != NULL) {
13143 cur->parent = NULL;
13144 cur = cur->next;
13145 }
13146 newDoc->children->children = NULL;
13147 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013148 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013149 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013150
13151 /*
13152 * Record in the parent context the number of entities replacement
13153 * done when parsing that reference.
13154 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013155 if (oldctxt != NULL)
13156 oldctxt->nbentities += ctxt->nbentities;
13157
Daniel Veillard0161e632008-08-28 15:36:32 +000013158 /*
13159 * Also record the size of the entity parsed
13160 */
13161 if (ctxt->input != NULL) {
13162 oldctxt->sizeentities += ctxt->input->consumed;
13163 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13164 }
13165 /*
13166 * And record the last error if any
13167 */
13168 if (ctxt->lastError.code != XML_ERR_OK)
13169 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13170
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013171 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013172 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013173 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13174 oldctxt->node_seq.length = ctxt->node_seq.length;
13175 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013176 ctxt->node_seq.maximum = 0;
13177 ctxt->node_seq.length = 0;
13178 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013179 xmlFreeParserCtxt(ctxt);
13180 newDoc->intSubset = NULL;
13181 newDoc->extSubset = NULL;
13182 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013183
Owen Taylor3473f882001-02-23 17:55:21 +000013184 return(ret);
13185}
13186
Daniel Veillard81273902003-09-30 00:43:48 +000013187#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013188/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013189 * xmlParseExternalEntity:
13190 * @doc: the document the chunk pertains to
13191 * @sax: the SAX handler bloc (possibly NULL)
13192 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13193 * @depth: Used for loop detection, use 0
13194 * @URL: the URL for the entity to load
13195 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013196 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013197 *
13198 * Parse an external general entity
13199 * An external general parsed entity is well-formed if it matches the
13200 * production labeled extParsedEnt.
13201 *
13202 * [78] extParsedEnt ::= TextDecl? content
13203 *
13204 * Returns 0 if the entity is well formed, -1 in case of args problem and
13205 * the parser error code otherwise
13206 */
13207
13208int
13209xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013210 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013211 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013212 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013213}
13214
13215/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013216 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013217 * @doc: the document the chunk pertains to
13218 * @sax: the SAX handler bloc (possibly NULL)
13219 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13220 * @depth: Used for loop detection, use 0
13221 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013222 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013223 *
13224 * Parse a well-balanced chunk of an XML document
13225 * called by the parser
13226 * The allowed sequence for the Well Balanced Chunk is the one defined by
13227 * the content production in the XML grammar:
13228 *
13229 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13230 *
13231 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13232 * the parser error code otherwise
13233 */
13234
13235int
13236xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013237 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013238 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13239 depth, string, lst, 0 );
13240}
Daniel Veillard81273902003-09-30 00:43:48 +000013241#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013242
13243/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013244 * xmlParseBalancedChunkMemoryInternal:
13245 * @oldctxt: the existing parsing context
13246 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13247 * @user_data: the user data field for the parser context
13248 * @lst: the return value for the set of parsed nodes
13249 *
13250 *
13251 * Parse a well-balanced chunk of an XML document
13252 * called by the parser
13253 * The allowed sequence for the Well Balanced Chunk is the one defined by
13254 * the content production in the XML grammar:
13255 *
13256 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13257 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013258 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13259 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013260 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013261 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013262 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013263 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013264static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013265xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13266 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13267 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013268 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013269 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013270 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013271 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013272 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013273 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013274 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013275#ifdef SAX2
13276 int i;
13277#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013278
Daniel Veillard0161e632008-08-28 15:36:32 +000013279 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13280 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013281 return(XML_ERR_ENTITY_LOOP);
13282 }
13283
13284
13285 if (lst != NULL)
13286 *lst = NULL;
13287 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013288 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013289
13290 size = xmlStrlen(string);
13291
13292 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013293 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013294 if (user_data != NULL)
13295 ctxt->userData = user_data;
13296 else
13297 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013298 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13299 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013300 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13301 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13302 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013303
Daniel Veillard74eaec12009-08-26 15:57:20 +020013304#ifdef SAX2
13305 /* propagate namespaces down the entity */
13306 for (i = 0;i < oldctxt->nsNr;i += 2) {
13307 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13308 }
13309#endif
13310
Daniel Veillard328f48c2002-11-15 15:24:34 +000013311 oldsax = ctxt->sax;
13312 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013313 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013314 ctxt->replaceEntities = oldctxt->replaceEntities;
13315 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013316
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013317 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013318 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013319 newDoc = xmlNewDoc(BAD_CAST "1.0");
13320 if (newDoc == NULL) {
13321 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013322 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013323 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013324 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013325 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013326 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013327 newDoc->dict = ctxt->dict;
13328 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013329 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013330 } else {
13331 ctxt->myDoc = oldctxt->myDoc;
13332 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013333 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013334 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013335 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13336 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013337 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013338 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013339 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013340 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013341 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013342 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013343 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013344 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013345 ctxt->myDoc->children = NULL;
13346 ctxt->myDoc->last = NULL;
13347 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013348 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013349 ctxt->instate = XML_PARSER_CONTENT;
13350 ctxt->depth = oldctxt->depth + 1;
13351
Daniel Veillard328f48c2002-11-15 15:24:34 +000013352 ctxt->validate = 0;
13353 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013354 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13355 /*
13356 * ID/IDREF registration will be done in xmlValidateElement below
13357 */
13358 ctxt->loadsubset |= XML_SKIP_IDS;
13359 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013360 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013361 ctxt->attsDefault = oldctxt->attsDefault;
13362 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013363
Daniel Veillard68e9e742002-11-16 15:35:11 +000013364 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013365 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013366 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013367 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013368 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013369 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013370 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013371 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013372 }
13373
13374 if (!ctxt->wellFormed) {
13375 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013376 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013377 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013378 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013379 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013380 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013381 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013382
William M. Brack7b9154b2003-09-27 19:23:50 +000013383 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013384 xmlNodePtr cur;
13385
13386 /*
13387 * Return the newly created nodeset after unlinking it from
13388 * they pseudo parent.
13389 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013390 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013391 *lst = cur;
13392 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013393#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013394 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13395 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13396 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013397 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13398 oldctxt->myDoc, cur);
13399 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013400#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013401 cur->parent = NULL;
13402 cur = cur->next;
13403 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013404 ctxt->myDoc->children->children = NULL;
13405 }
13406 if (ctxt->myDoc != NULL) {
13407 xmlFreeNode(ctxt->myDoc->children);
13408 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013409 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013410 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013411
13412 /*
13413 * Record in the parent context the number of entities replacement
13414 * done when parsing that reference.
13415 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013416 if (oldctxt != NULL)
13417 oldctxt->nbentities += ctxt->nbentities;
13418
Daniel Veillard0161e632008-08-28 15:36:32 +000013419 /*
13420 * Also record the last error if any
13421 */
13422 if (ctxt->lastError.code != XML_ERR_OK)
13423 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13424
Daniel Veillard328f48c2002-11-15 15:24:34 +000013425 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013426 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013427 ctxt->attsDefault = NULL;
13428 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013429 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013430 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013431 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013432 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013433
Daniel Veillard328f48c2002-11-15 15:24:34 +000013434 return(ret);
13435}
13436
Daniel Veillard29b17482004-08-16 00:39:03 +000013437/**
13438 * xmlParseInNodeContext:
13439 * @node: the context node
13440 * @data: the input string
13441 * @datalen: the input string length in bytes
13442 * @options: a combination of xmlParserOption
13443 * @lst: the return value for the set of parsed nodes
13444 *
13445 * Parse a well-balanced chunk of an XML document
13446 * within the context (DTD, namespaces, etc ...) of the given node.
13447 *
13448 * The allowed sequence for the data is a Well Balanced Chunk defined by
13449 * the content production in the XML grammar:
13450 *
13451 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13452 *
13453 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13454 * error code otherwise
13455 */
13456xmlParserErrors
13457xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13458 int options, xmlNodePtr *lst) {
13459#ifdef SAX2
13460 xmlParserCtxtPtr ctxt;
13461 xmlDocPtr doc = NULL;
13462 xmlNodePtr fake, cur;
13463 int nsnr = 0;
13464
13465 xmlParserErrors ret = XML_ERR_OK;
13466
13467 /*
13468 * check all input parameters, grab the document
13469 */
13470 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13471 return(XML_ERR_INTERNAL_ERROR);
13472 switch (node->type) {
13473 case XML_ELEMENT_NODE:
13474 case XML_ATTRIBUTE_NODE:
13475 case XML_TEXT_NODE:
13476 case XML_CDATA_SECTION_NODE:
13477 case XML_ENTITY_REF_NODE:
13478 case XML_PI_NODE:
13479 case XML_COMMENT_NODE:
13480 case XML_DOCUMENT_NODE:
13481 case XML_HTML_DOCUMENT_NODE:
13482 break;
13483 default:
13484 return(XML_ERR_INTERNAL_ERROR);
13485
13486 }
13487 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13488 (node->type != XML_DOCUMENT_NODE) &&
13489 (node->type != XML_HTML_DOCUMENT_NODE))
13490 node = node->parent;
13491 if (node == NULL)
13492 return(XML_ERR_INTERNAL_ERROR);
13493 if (node->type == XML_ELEMENT_NODE)
13494 doc = node->doc;
13495 else
13496 doc = (xmlDocPtr) node;
13497 if (doc == NULL)
13498 return(XML_ERR_INTERNAL_ERROR);
13499
13500 /*
13501 * allocate a context and set-up everything not related to the
13502 * node position in the tree
13503 */
13504 if (doc->type == XML_DOCUMENT_NODE)
13505 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13506#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013507 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013508 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013509 /*
13510 * When parsing in context, it makes no sense to add implied
13511 * elements like html/body/etc...
13512 */
13513 options |= HTML_PARSE_NOIMPLIED;
13514 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013515#endif
13516 else
13517 return(XML_ERR_INTERNAL_ERROR);
13518
13519 if (ctxt == NULL)
13520 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013521
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013522 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013523 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13524 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13525 * we must wait until the last moment to free the original one.
13526 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013527 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013528 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013529 xmlDictFree(ctxt->dict);
13530 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013531 } else
13532 options |= XML_PARSE_NODICT;
13533
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013534 if (doc->encoding != NULL) {
13535 xmlCharEncodingHandlerPtr hdlr;
13536
13537 if (ctxt->encoding != NULL)
13538 xmlFree((xmlChar *) ctxt->encoding);
13539 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13540
13541 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13542 if (hdlr != NULL) {
13543 xmlSwitchToEncoding(ctxt, hdlr);
13544 } else {
13545 return(XML_ERR_UNSUPPORTED_ENCODING);
13546 }
13547 }
13548
Daniel Veillard37334572008-07-31 08:20:02 +000013549 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013550 xmlDetectSAX2(ctxt);
13551 ctxt->myDoc = doc;
13552
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013553 fake = xmlNewComment(NULL);
13554 if (fake == NULL) {
13555 xmlFreeParserCtxt(ctxt);
13556 return(XML_ERR_NO_MEMORY);
13557 }
13558 xmlAddChild(node, fake);
13559
Daniel Veillard29b17482004-08-16 00:39:03 +000013560 if (node->type == XML_ELEMENT_NODE) {
13561 nodePush(ctxt, node);
13562 /*
13563 * initialize the SAX2 namespaces stack
13564 */
13565 cur = node;
13566 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13567 xmlNsPtr ns = cur->nsDef;
13568 const xmlChar *iprefix, *ihref;
13569
13570 while (ns != NULL) {
13571 if (ctxt->dict) {
13572 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13573 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13574 } else {
13575 iprefix = ns->prefix;
13576 ihref = ns->href;
13577 }
13578
13579 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13580 nsPush(ctxt, iprefix, ihref);
13581 nsnr++;
13582 }
13583 ns = ns->next;
13584 }
13585 cur = cur->parent;
13586 }
13587 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013588 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013589
13590 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13591 /*
13592 * ID/IDREF registration will be done in xmlValidateElement below
13593 */
13594 ctxt->loadsubset |= XML_SKIP_IDS;
13595 }
13596
Daniel Veillard499cc922006-01-18 17:22:35 +000013597#ifdef LIBXML_HTML_ENABLED
13598 if (doc->type == XML_HTML_DOCUMENT_NODE)
13599 __htmlParseContent(ctxt);
13600 else
13601#endif
13602 xmlParseContent(ctxt);
13603
Daniel Veillard29b17482004-08-16 00:39:03 +000013604 nsPop(ctxt, nsnr);
13605 if ((RAW == '<') && (NXT(1) == '/')) {
13606 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13607 } else if (RAW != 0) {
13608 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13609 }
13610 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13611 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13612 ctxt->wellFormed = 0;
13613 }
13614
13615 if (!ctxt->wellFormed) {
13616 if (ctxt->errNo == 0)
13617 ret = XML_ERR_INTERNAL_ERROR;
13618 else
13619 ret = (xmlParserErrors)ctxt->errNo;
13620 } else {
13621 ret = XML_ERR_OK;
13622 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013623
Daniel Veillard29b17482004-08-16 00:39:03 +000013624 /*
13625 * Return the newly created nodeset after unlinking it from
13626 * the pseudo sibling.
13627 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013628
Daniel Veillard29b17482004-08-16 00:39:03 +000013629 cur = fake->next;
13630 fake->next = NULL;
13631 node->last = fake;
13632
13633 if (cur != NULL) {
13634 cur->prev = NULL;
13635 }
13636
13637 *lst = cur;
13638
13639 while (cur != NULL) {
13640 cur->parent = NULL;
13641 cur = cur->next;
13642 }
13643
13644 xmlUnlinkNode(fake);
13645 xmlFreeNode(fake);
13646
13647
13648 if (ret != XML_ERR_OK) {
13649 xmlFreeNodeList(*lst);
13650 *lst = NULL;
13651 }
William M. Brackc3f81342004-10-03 01:22:44 +000013652
William M. Brackb7b54de2004-10-06 16:38:01 +000013653 if (doc->dict != NULL)
13654 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013655 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013656
Daniel Veillard29b17482004-08-16 00:39:03 +000013657 return(ret);
13658#else /* !SAX2 */
13659 return(XML_ERR_INTERNAL_ERROR);
13660#endif
13661}
13662
Daniel Veillard81273902003-09-30 00:43:48 +000013663#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013664/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013665 * xmlParseBalancedChunkMemoryRecover:
13666 * @doc: the document the chunk pertains to
13667 * @sax: the SAX handler bloc (possibly NULL)
13668 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13669 * @depth: Used for loop detection, use 0
13670 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13671 * @lst: the return value for the set of parsed nodes
13672 * @recover: return nodes even if the data is broken (use 0)
13673 *
13674 *
13675 * Parse a well-balanced chunk of an XML document
13676 * called by the parser
13677 * The allowed sequence for the Well Balanced Chunk is the one defined by
13678 * the content production in the XML grammar:
13679 *
13680 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13681 *
13682 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13683 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013684 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013685 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013686 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13687 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013688 */
13689int
13690xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013691 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013692 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013693 xmlParserCtxtPtr ctxt;
13694 xmlDocPtr newDoc;
13695 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013696 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013697 int size;
13698 int ret = 0;
13699
Daniel Veillard0161e632008-08-28 15:36:32 +000013700 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013701 return(XML_ERR_ENTITY_LOOP);
13702 }
13703
13704
Daniel Veillardcda96922001-08-21 10:56:31 +000013705 if (lst != NULL)
13706 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013707 if (string == NULL)
13708 return(-1);
13709
13710 size = xmlStrlen(string);
13711
13712 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13713 if (ctxt == NULL) return(-1);
13714 ctxt->userData = ctxt;
13715 if (sax != NULL) {
13716 oldsax = ctxt->sax;
13717 ctxt->sax = sax;
13718 if (user_data != NULL)
13719 ctxt->userData = user_data;
13720 }
13721 newDoc = xmlNewDoc(BAD_CAST "1.0");
13722 if (newDoc == NULL) {
13723 xmlFreeParserCtxt(ctxt);
13724 return(-1);
13725 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013726 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013727 if ((doc != NULL) && (doc->dict != NULL)) {
13728 xmlDictFree(ctxt->dict);
13729 ctxt->dict = doc->dict;
13730 xmlDictReference(ctxt->dict);
13731 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13732 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13733 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13734 ctxt->dictNames = 1;
13735 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013736 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013737 }
Owen Taylor3473f882001-02-23 17:55:21 +000013738 if (doc != NULL) {
13739 newDoc->intSubset = doc->intSubset;
13740 newDoc->extSubset = doc->extSubset;
13741 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013742 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13743 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013744 if (sax != NULL)
13745 ctxt->sax = oldsax;
13746 xmlFreeParserCtxt(ctxt);
13747 newDoc->intSubset = NULL;
13748 newDoc->extSubset = NULL;
13749 xmlFreeDoc(newDoc);
13750 return(-1);
13751 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013752 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13753 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013754 if (doc == NULL) {
13755 ctxt->myDoc = newDoc;
13756 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013757 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013758 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013759 /* Ensure that doc has XML spec namespace */
13760 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13761 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013762 }
13763 ctxt->instate = XML_PARSER_CONTENT;
13764 ctxt->depth = depth;
13765
13766 /*
13767 * Doing validity checking on chunk doesn't make sense
13768 */
13769 ctxt->validate = 0;
13770 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013771 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013772
Daniel Veillardb39bc392002-10-26 19:29:51 +000013773 if ( doc != NULL ){
13774 content = doc->children;
13775 doc->children = NULL;
13776 xmlParseContent(ctxt);
13777 doc->children = content;
13778 }
13779 else {
13780 xmlParseContent(ctxt);
13781 }
Owen Taylor3473f882001-02-23 17:55:21 +000013782 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013783 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013784 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013785 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013786 }
13787 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013788 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013789 }
13790
13791 if (!ctxt->wellFormed) {
13792 if (ctxt->errNo == 0)
13793 ret = 1;
13794 else
13795 ret = ctxt->errNo;
13796 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013797 ret = 0;
13798 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013799
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013800 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13801 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013802
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013803 /*
13804 * Return the newly created nodeset after unlinking it from
13805 * they pseudo parent.
13806 */
13807 cur = newDoc->children->children;
13808 *lst = cur;
13809 while (cur != NULL) {
13810 xmlSetTreeDoc(cur, doc);
13811 cur->parent = NULL;
13812 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013813 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013814 newDoc->children->children = NULL;
13815 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013816
13817 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013818 ctxt->sax = oldsax;
13819 xmlFreeParserCtxt(ctxt);
13820 newDoc->intSubset = NULL;
13821 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013822 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013823 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013824
Owen Taylor3473f882001-02-23 17:55:21 +000013825 return(ret);
13826}
13827
13828/**
13829 * xmlSAXParseEntity:
13830 * @sax: the SAX handler block
13831 * @filename: the filename
13832 *
13833 * parse an XML external entity out of context and build a tree.
13834 * It use the given SAX function block to handle the parsing callback.
13835 * If sax is NULL, fallback to the default DOM tree building routines.
13836 *
13837 * [78] extParsedEnt ::= TextDecl? content
13838 *
13839 * This correspond to a "Well Balanced" chunk
13840 *
13841 * Returns the resulting document tree
13842 */
13843
13844xmlDocPtr
13845xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13846 xmlDocPtr ret;
13847 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013848
13849 ctxt = xmlCreateFileParserCtxt(filename);
13850 if (ctxt == NULL) {
13851 return(NULL);
13852 }
13853 if (sax != NULL) {
13854 if (ctxt->sax != NULL)
13855 xmlFree(ctxt->sax);
13856 ctxt->sax = sax;
13857 ctxt->userData = NULL;
13858 }
13859
Owen Taylor3473f882001-02-23 17:55:21 +000013860 xmlParseExtParsedEnt(ctxt);
13861
13862 if (ctxt->wellFormed)
13863 ret = ctxt->myDoc;
13864 else {
13865 ret = NULL;
13866 xmlFreeDoc(ctxt->myDoc);
13867 ctxt->myDoc = NULL;
13868 }
13869 if (sax != NULL)
13870 ctxt->sax = NULL;
13871 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013872
Owen Taylor3473f882001-02-23 17:55:21 +000013873 return(ret);
13874}
13875
13876/**
13877 * xmlParseEntity:
13878 * @filename: the filename
13879 *
13880 * parse an XML external entity out of context and build a tree.
13881 *
13882 * [78] extParsedEnt ::= TextDecl? content
13883 *
13884 * This correspond to a "Well Balanced" chunk
13885 *
13886 * Returns the resulting document tree
13887 */
13888
13889xmlDocPtr
13890xmlParseEntity(const char *filename) {
13891 return(xmlSAXParseEntity(NULL, filename));
13892}
Daniel Veillard81273902003-09-30 00:43:48 +000013893#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013894
13895/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013896 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013897 * @URL: the entity URL
13898 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013899 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013900 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013901 *
13902 * Create a parser context for an external entity
13903 * Automatic support for ZLIB/Compress compressed document is provided
13904 * by default if found at compile-time.
13905 *
13906 * Returns the new parser context or NULL
13907 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013908static xmlParserCtxtPtr
13909xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13910 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013911 xmlParserCtxtPtr ctxt;
13912 xmlParserInputPtr inputStream;
13913 char *directory = NULL;
13914 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013915
Owen Taylor3473f882001-02-23 17:55:21 +000013916 ctxt = xmlNewParserCtxt();
13917 if (ctxt == NULL) {
13918 return(NULL);
13919 }
13920
Daniel Veillard48247b42009-07-10 16:12:46 +020013921 if (pctx != NULL) {
13922 ctxt->options = pctx->options;
13923 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013924 }
13925
Owen Taylor3473f882001-02-23 17:55:21 +000013926 uri = xmlBuildURI(URL, base);
13927
13928 if (uri == NULL) {
13929 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13930 if (inputStream == NULL) {
13931 xmlFreeParserCtxt(ctxt);
13932 return(NULL);
13933 }
13934
13935 inputPush(ctxt, inputStream);
13936
13937 if ((ctxt->directory == NULL) && (directory == NULL))
13938 directory = xmlParserGetDirectory((char *)URL);
13939 if ((ctxt->directory == NULL) && (directory != NULL))
13940 ctxt->directory = directory;
13941 } else {
13942 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13943 if (inputStream == NULL) {
13944 xmlFree(uri);
13945 xmlFreeParserCtxt(ctxt);
13946 return(NULL);
13947 }
13948
13949 inputPush(ctxt, inputStream);
13950
13951 if ((ctxt->directory == NULL) && (directory == NULL))
13952 directory = xmlParserGetDirectory((char *)uri);
13953 if ((ctxt->directory == NULL) && (directory != NULL))
13954 ctxt->directory = directory;
13955 xmlFree(uri);
13956 }
Owen Taylor3473f882001-02-23 17:55:21 +000013957 return(ctxt);
13958}
13959
Rob Richards9c0aa472009-03-26 18:10:19 +000013960/**
13961 * xmlCreateEntityParserCtxt:
13962 * @URL: the entity URL
13963 * @ID: the entity PUBLIC ID
13964 * @base: a possible base for the target URI
13965 *
13966 * Create a parser context for an external entity
13967 * Automatic support for ZLIB/Compress compressed document is provided
13968 * by default if found at compile-time.
13969 *
13970 * Returns the new parser context or NULL
13971 */
13972xmlParserCtxtPtr
13973xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13974 const xmlChar *base) {
13975 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13976
13977}
13978
Owen Taylor3473f882001-02-23 17:55:21 +000013979/************************************************************************
13980 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013981 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013982 * *
13983 ************************************************************************/
13984
13985/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013986 * xmlCreateURLParserCtxt:
13987 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013988 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013989 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013990 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013991 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013992 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013993 *
13994 * Returns the new parser context or NULL
13995 */
13996xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013997xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013998{
13999 xmlParserCtxtPtr ctxt;
14000 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014001 char *directory = NULL;
14002
Owen Taylor3473f882001-02-23 17:55:21 +000014003 ctxt = xmlNewParserCtxt();
14004 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014005 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014006 return(NULL);
14007 }
14008
Daniel Veillarddf292f72005-01-16 19:00:15 +000014009 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014010 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014011 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014012
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014013 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014014 if (inputStream == NULL) {
14015 xmlFreeParserCtxt(ctxt);
14016 return(NULL);
14017 }
14018
Owen Taylor3473f882001-02-23 17:55:21 +000014019 inputPush(ctxt, inputStream);
14020 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014021 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014022 if ((ctxt->directory == NULL) && (directory != NULL))
14023 ctxt->directory = directory;
14024
14025 return(ctxt);
14026}
14027
Daniel Veillard61b93382003-11-03 14:28:31 +000014028/**
14029 * xmlCreateFileParserCtxt:
14030 * @filename: the filename
14031 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014032 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014033 * Automatic support for ZLIB/Compress compressed document is provided
14034 * by default if found at compile-time.
14035 *
14036 * Returns the new parser context or NULL
14037 */
14038xmlParserCtxtPtr
14039xmlCreateFileParserCtxt(const char *filename)
14040{
14041 return(xmlCreateURLParserCtxt(filename, 0));
14042}
14043
Daniel Veillard81273902003-09-30 00:43:48 +000014044#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014045/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014046 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014047 * @sax: the SAX handler block
14048 * @filename: the filename
14049 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14050 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014051 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014052 *
14053 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14054 * compressed document is provided by default if found at compile-time.
14055 * It use the given SAX function block to handle the parsing callback.
14056 * If sax is NULL, fallback to the default DOM tree building routines.
14057 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014058 * User data (void *) is stored within the parser context in the
14059 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014060 *
Owen Taylor3473f882001-02-23 17:55:21 +000014061 * Returns the resulting document tree
14062 */
14063
14064xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014065xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14066 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014067 xmlDocPtr ret;
14068 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014069
Daniel Veillard635ef722001-10-29 11:48:19 +000014070 xmlInitParser();
14071
Owen Taylor3473f882001-02-23 17:55:21 +000014072 ctxt = xmlCreateFileParserCtxt(filename);
14073 if (ctxt == NULL) {
14074 return(NULL);
14075 }
14076 if (sax != NULL) {
14077 if (ctxt->sax != NULL)
14078 xmlFree(ctxt->sax);
14079 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014080 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014081 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014082 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014083 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014084 }
Owen Taylor3473f882001-02-23 17:55:21 +000014085
Daniel Veillard37d2d162008-03-14 10:54:00 +000014086 if (ctxt->directory == NULL)
14087 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014088
Daniel Veillarddad3f682002-11-17 16:47:27 +000014089 ctxt->recovery = recovery;
14090
Owen Taylor3473f882001-02-23 17:55:21 +000014091 xmlParseDocument(ctxt);
14092
William M. Brackc07329e2003-09-08 01:57:30 +000014093 if ((ctxt->wellFormed) || recovery) {
14094 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014095 if (ret != NULL) {
14096 if (ctxt->input->buf->compressed > 0)
14097 ret->compression = 9;
14098 else
14099 ret->compression = ctxt->input->buf->compressed;
14100 }
William M. Brackc07329e2003-09-08 01:57:30 +000014101 }
Owen Taylor3473f882001-02-23 17:55:21 +000014102 else {
14103 ret = NULL;
14104 xmlFreeDoc(ctxt->myDoc);
14105 ctxt->myDoc = NULL;
14106 }
14107 if (sax != NULL)
14108 ctxt->sax = NULL;
14109 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014110
Owen Taylor3473f882001-02-23 17:55:21 +000014111 return(ret);
14112}
14113
14114/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014115 * xmlSAXParseFile:
14116 * @sax: the SAX handler block
14117 * @filename: the filename
14118 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14119 * documents
14120 *
14121 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14122 * compressed document is provided by default if found at compile-time.
14123 * It use the given SAX function block to handle the parsing callback.
14124 * If sax is NULL, fallback to the default DOM tree building routines.
14125 *
14126 * Returns the resulting document tree
14127 */
14128
14129xmlDocPtr
14130xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14131 int recovery) {
14132 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14133}
14134
14135/**
Owen Taylor3473f882001-02-23 17:55:21 +000014136 * xmlRecoverDoc:
14137 * @cur: a pointer to an array of xmlChar
14138 *
14139 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014140 * In the case the document is not Well Formed, a attempt to build a
14141 * tree is tried anyway
14142 *
14143 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014144 */
14145
14146xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014147xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014148 return(xmlSAXParseDoc(NULL, cur, 1));
14149}
14150
14151/**
14152 * xmlParseFile:
14153 * @filename: the filename
14154 *
14155 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14156 * compressed document is provided by default if found at compile-time.
14157 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014158 * Returns the resulting document tree if the file was wellformed,
14159 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014160 */
14161
14162xmlDocPtr
14163xmlParseFile(const char *filename) {
14164 return(xmlSAXParseFile(NULL, filename, 0));
14165}
14166
14167/**
14168 * xmlRecoverFile:
14169 * @filename: the filename
14170 *
14171 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14172 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014173 * In the case the document is not Well Formed, it attempts to build
14174 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014175 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014176 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014177 */
14178
14179xmlDocPtr
14180xmlRecoverFile(const char *filename) {
14181 return(xmlSAXParseFile(NULL, filename, 1));
14182}
14183
14184
14185/**
14186 * xmlSetupParserForBuffer:
14187 * @ctxt: an XML parser context
14188 * @buffer: a xmlChar * buffer
14189 * @filename: a file name
14190 *
14191 * Setup the parser context to parse a new buffer; Clears any prior
14192 * contents from the parser context. The buffer parameter must not be
14193 * NULL, but the filename parameter can be
14194 */
14195void
14196xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14197 const char* filename)
14198{
14199 xmlParserInputPtr input;
14200
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014201 if ((ctxt == NULL) || (buffer == NULL))
14202 return;
14203
Owen Taylor3473f882001-02-23 17:55:21 +000014204 input = xmlNewInputStream(ctxt);
14205 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014206 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014207 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014208 return;
14209 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014210
Owen Taylor3473f882001-02-23 17:55:21 +000014211 xmlClearParserCtxt(ctxt);
14212 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014213 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014214 input->base = buffer;
14215 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014216 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014217 inputPush(ctxt, input);
14218}
14219
14220/**
14221 * xmlSAXUserParseFile:
14222 * @sax: a SAX handler
14223 * @user_data: The user data returned on SAX callbacks
14224 * @filename: a file name
14225 *
14226 * parse an XML file and call the given SAX handler routines.
14227 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014228 *
Owen Taylor3473f882001-02-23 17:55:21 +000014229 * Returns 0 in case of success or a error number otherwise
14230 */
14231int
14232xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14233 const char *filename) {
14234 int ret = 0;
14235 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014236
Owen Taylor3473f882001-02-23 17:55:21 +000014237 ctxt = xmlCreateFileParserCtxt(filename);
14238 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014239 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014240 xmlFree(ctxt->sax);
14241 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014242 xmlDetectSAX2(ctxt);
14243
Owen Taylor3473f882001-02-23 17:55:21 +000014244 if (user_data != NULL)
14245 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014246
Owen Taylor3473f882001-02-23 17:55:21 +000014247 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014248
Owen Taylor3473f882001-02-23 17:55:21 +000014249 if (ctxt->wellFormed)
14250 ret = 0;
14251 else {
14252 if (ctxt->errNo != 0)
14253 ret = ctxt->errNo;
14254 else
14255 ret = -1;
14256 }
14257 if (sax != NULL)
14258 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014259 if (ctxt->myDoc != NULL) {
14260 xmlFreeDoc(ctxt->myDoc);
14261 ctxt->myDoc = NULL;
14262 }
Owen Taylor3473f882001-02-23 17:55:21 +000014263 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014264
Owen Taylor3473f882001-02-23 17:55:21 +000014265 return ret;
14266}
Daniel Veillard81273902003-09-30 00:43:48 +000014267#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014268
14269/************************************************************************
14270 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014271 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014272 * *
14273 ************************************************************************/
14274
14275/**
14276 * xmlCreateMemoryParserCtxt:
14277 * @buffer: a pointer to a char array
14278 * @size: the size of the array
14279 *
14280 * Create a parser context for an XML in-memory document.
14281 *
14282 * Returns the new parser context or NULL
14283 */
14284xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014285xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014286 xmlParserCtxtPtr ctxt;
14287 xmlParserInputPtr input;
14288 xmlParserInputBufferPtr buf;
14289
14290 if (buffer == NULL)
14291 return(NULL);
14292 if (size <= 0)
14293 return(NULL);
14294
14295 ctxt = xmlNewParserCtxt();
14296 if (ctxt == NULL)
14297 return(NULL);
14298
Daniel Veillard53350552003-09-18 13:35:51 +000014299 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014300 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014301 if (buf == NULL) {
14302 xmlFreeParserCtxt(ctxt);
14303 return(NULL);
14304 }
Owen Taylor3473f882001-02-23 17:55:21 +000014305
14306 input = xmlNewInputStream(ctxt);
14307 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014308 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014309 xmlFreeParserCtxt(ctxt);
14310 return(NULL);
14311 }
14312
14313 input->filename = NULL;
14314 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014315 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014316
14317 inputPush(ctxt, input);
14318 return(ctxt);
14319}
14320
Daniel Veillard81273902003-09-30 00:43:48 +000014321#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014322/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014323 * xmlSAXParseMemoryWithData:
14324 * @sax: the SAX handler block
14325 * @buffer: an pointer to a char array
14326 * @size: the size of the array
14327 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14328 * documents
14329 * @data: the userdata
14330 *
14331 * parse an XML in-memory block and use the given SAX function block
14332 * to handle the parsing callback. If sax is NULL, fallback to the default
14333 * DOM tree building routines.
14334 *
14335 * User data (void *) is stored within the parser context in the
14336 * context's _private member, so it is available nearly everywhere in libxml
14337 *
14338 * Returns the resulting document tree
14339 */
14340
14341xmlDocPtr
14342xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14343 int size, int recovery, void *data) {
14344 xmlDocPtr ret;
14345 xmlParserCtxtPtr ctxt;
14346
Daniel Veillardab2a7632009-07-09 08:45:03 +020014347 xmlInitParser();
14348
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014349 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14350 if (ctxt == NULL) return(NULL);
14351 if (sax != NULL) {
14352 if (ctxt->sax != NULL)
14353 xmlFree(ctxt->sax);
14354 ctxt->sax = sax;
14355 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014356 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014357 if (data!=NULL) {
14358 ctxt->_private=data;
14359 }
14360
Daniel Veillardadba5f12003-04-04 16:09:01 +000014361 ctxt->recovery = recovery;
14362
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014363 xmlParseDocument(ctxt);
14364
14365 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14366 else {
14367 ret = NULL;
14368 xmlFreeDoc(ctxt->myDoc);
14369 ctxt->myDoc = NULL;
14370 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014371 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014372 ctxt->sax = NULL;
14373 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014374
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014375 return(ret);
14376}
14377
14378/**
Owen Taylor3473f882001-02-23 17:55:21 +000014379 * xmlSAXParseMemory:
14380 * @sax: the SAX handler block
14381 * @buffer: an pointer to a char array
14382 * @size: the size of the array
14383 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14384 * documents
14385 *
14386 * parse an XML in-memory block and use the given SAX function block
14387 * to handle the parsing callback. If sax is NULL, fallback to the default
14388 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014389 *
Owen Taylor3473f882001-02-23 17:55:21 +000014390 * Returns the resulting document tree
14391 */
14392xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014393xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14394 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014395 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014396}
14397
14398/**
14399 * xmlParseMemory:
14400 * @buffer: an pointer to a char array
14401 * @size: the size of the array
14402 *
14403 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014404 *
Owen Taylor3473f882001-02-23 17:55:21 +000014405 * Returns the resulting document tree
14406 */
14407
Daniel Veillard50822cb2001-07-26 20:05:51 +000014408xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014409 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14410}
14411
14412/**
14413 * xmlRecoverMemory:
14414 * @buffer: an pointer to a char array
14415 * @size: the size of the array
14416 *
14417 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014418 * In the case the document is not Well Formed, an attempt to
14419 * build a tree is tried anyway
14420 *
14421 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014422 */
14423
Daniel Veillard50822cb2001-07-26 20:05:51 +000014424xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014425 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14426}
14427
14428/**
14429 * xmlSAXUserParseMemory:
14430 * @sax: a SAX handler
14431 * @user_data: The user data returned on SAX callbacks
14432 * @buffer: an in-memory XML document input
14433 * @size: the length of the XML document in bytes
14434 *
14435 * A better SAX parsing routine.
14436 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014437 *
Owen Taylor3473f882001-02-23 17:55:21 +000014438 * Returns 0 in case of success or a error number otherwise
14439 */
14440int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014441 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014442 int ret = 0;
14443 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014444
14445 xmlInitParser();
14446
Owen Taylor3473f882001-02-23 17:55:21 +000014447 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14448 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014449 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14450 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014451 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014452 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014453
Daniel Veillard30211a02001-04-26 09:33:18 +000014454 if (user_data != NULL)
14455 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014456
Owen Taylor3473f882001-02-23 17:55:21 +000014457 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014458
Owen Taylor3473f882001-02-23 17:55:21 +000014459 if (ctxt->wellFormed)
14460 ret = 0;
14461 else {
14462 if (ctxt->errNo != 0)
14463 ret = ctxt->errNo;
14464 else
14465 ret = -1;
14466 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014467 if (sax != NULL)
14468 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014469 if (ctxt->myDoc != NULL) {
14470 xmlFreeDoc(ctxt->myDoc);
14471 ctxt->myDoc = NULL;
14472 }
Owen Taylor3473f882001-02-23 17:55:21 +000014473 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014474
Owen Taylor3473f882001-02-23 17:55:21 +000014475 return ret;
14476}
Daniel Veillard81273902003-09-30 00:43:48 +000014477#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014478
14479/**
14480 * xmlCreateDocParserCtxt:
14481 * @cur: a pointer to an array of xmlChar
14482 *
14483 * Creates a parser context for an XML in-memory document.
14484 *
14485 * Returns the new parser context or NULL
14486 */
14487xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014488xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014489 int len;
14490
14491 if (cur == NULL)
14492 return(NULL);
14493 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014494 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014495}
14496
Daniel Veillard81273902003-09-30 00:43:48 +000014497#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014498/**
14499 * xmlSAXParseDoc:
14500 * @sax: the SAX handler block
14501 * @cur: a pointer to an array of xmlChar
14502 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14503 * documents
14504 *
14505 * parse an XML in-memory document and build a tree.
14506 * It use the given SAX function block to handle the parsing callback.
14507 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014508 *
Owen Taylor3473f882001-02-23 17:55:21 +000014509 * Returns the resulting document tree
14510 */
14511
14512xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014513xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014514 xmlDocPtr ret;
14515 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014516 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014517
Daniel Veillard38936062004-11-04 17:45:11 +000014518 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014519
14520
14521 ctxt = xmlCreateDocParserCtxt(cur);
14522 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014523 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014524 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014525 ctxt->sax = sax;
14526 ctxt->userData = NULL;
14527 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014528 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014529
14530 xmlParseDocument(ctxt);
14531 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14532 else {
14533 ret = NULL;
14534 xmlFreeDoc(ctxt->myDoc);
14535 ctxt->myDoc = NULL;
14536 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014537 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014538 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014539 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014540
Owen Taylor3473f882001-02-23 17:55:21 +000014541 return(ret);
14542}
14543
14544/**
14545 * xmlParseDoc:
14546 * @cur: a pointer to an array of xmlChar
14547 *
14548 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014549 *
Owen Taylor3473f882001-02-23 17:55:21 +000014550 * Returns the resulting document tree
14551 */
14552
14553xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014554xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014555 return(xmlSAXParseDoc(NULL, cur, 0));
14556}
Daniel Veillard81273902003-09-30 00:43:48 +000014557#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014558
Daniel Veillard81273902003-09-30 00:43:48 +000014559#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014560/************************************************************************
14561 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014562 * Specific function to keep track of entities references *
14563 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014564 * *
14565 ************************************************************************/
14566
14567static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14568
14569/**
14570 * xmlAddEntityReference:
14571 * @ent : A valid entity
14572 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014573 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014574 *
14575 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14576 */
14577static void
14578xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14579 xmlNodePtr lastNode)
14580{
14581 if (xmlEntityRefFunc != NULL) {
14582 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14583 }
14584}
14585
14586
14587/**
14588 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014589 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014590 *
14591 * Set the function to call call back when a xml reference has been made
14592 */
14593void
14594xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14595{
14596 xmlEntityRefFunc = func;
14597}
Daniel Veillard81273902003-09-30 00:43:48 +000014598#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014599
14600/************************************************************************
14601 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014602 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014603 * *
14604 ************************************************************************/
14605
14606#ifdef LIBXML_XPATH_ENABLED
14607#include <libxml/xpath.h>
14608#endif
14609
Daniel Veillardffa3c742005-07-21 13:24:09 +000014610extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014611static int xmlParserInitialized = 0;
14612
14613/**
14614 * xmlInitParser:
14615 *
14616 * Initialization function for the XML parser.
14617 * This is not reentrant. Call once before processing in case of
14618 * use in multithreaded programs.
14619 */
14620
14621void
14622xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014623 if (xmlParserInitialized != 0)
14624 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014625
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014626#ifdef LIBXML_THREAD_ENABLED
14627 __xmlGlobalInitMutexLock();
14628 if (xmlParserInitialized == 0) {
14629#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014630 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014631 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014632 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14633 (xmlGenericError == NULL))
14634 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014635 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014636 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014637 xmlInitCharEncodingHandlers();
14638 xmlDefaultSAXHandlerInit();
14639 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014640#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014641 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014642#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014643#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014644 htmlInitAutoClose();
14645 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014646#endif
14647#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014648 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014649#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014650 xmlParserInitialized = 1;
14651#ifdef LIBXML_THREAD_ENABLED
14652 }
14653 __xmlGlobalInitMutexUnlock();
14654#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014655}
14656
14657/**
14658 * xmlCleanupParser:
14659 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014660 * This function name is somewhat misleading. It does not clean up
14661 * parser state, it cleans up memory allocated by the library itself.
14662 * It is a cleanup function for the XML library. It tries to reclaim all
14663 * related global memory allocated for the library processing.
14664 * It doesn't deallocate any document related memory. One should
14665 * call xmlCleanupParser() only when the process has finished using
14666 * the library and all XML/HTML documents built with it.
14667 * See also xmlInitParser() which has the opposite function of preparing
14668 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014669 *
14670 * WARNING: if your application is multithreaded or has plugin support
14671 * calling this may crash the application if another thread or
14672 * a plugin is still using libxml2. It's sometimes very hard to
14673 * guess if libxml2 is in use in the application, some libraries
14674 * or plugins may use it without notice. In case of doubt abstain
14675 * from calling this function or do it just before calling exit()
14676 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014677 */
14678
14679void
14680xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014681 if (!xmlParserInitialized)
14682 return;
14683
Owen Taylor3473f882001-02-23 17:55:21 +000014684 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014685#ifdef LIBXML_CATALOG_ENABLED
14686 xmlCatalogCleanup();
14687#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014688 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014689 xmlCleanupInputCallbacks();
14690#ifdef LIBXML_OUTPUT_ENABLED
14691 xmlCleanupOutputCallbacks();
14692#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014693#ifdef LIBXML_SCHEMAS_ENABLED
14694 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014695 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014696#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014697 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014698 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014699 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014700 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014701 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014702}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014703
14704/************************************************************************
14705 * *
14706 * New set (2.6.0) of simpler and more flexible APIs *
14707 * *
14708 ************************************************************************/
14709
14710/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014711 * DICT_FREE:
14712 * @str: a string
14713 *
14714 * Free a string if it is not owned by the "dict" dictionnary in the
14715 * current scope
14716 */
14717#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014718 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014719 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14720 xmlFree((char *)(str));
14721
14722/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014723 * xmlCtxtReset:
14724 * @ctxt: an XML parser context
14725 *
14726 * Reset a parser context
14727 */
14728void
14729xmlCtxtReset(xmlParserCtxtPtr ctxt)
14730{
14731 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014732 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014733
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014734 if (ctxt == NULL)
14735 return;
14736
14737 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014738
14739 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14740 xmlFreeInputStream(input);
14741 }
14742 ctxt->inputNr = 0;
14743 ctxt->input = NULL;
14744
14745 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014746 if (ctxt->spaceTab != NULL) {
14747 ctxt->spaceTab[0] = -1;
14748 ctxt->space = &ctxt->spaceTab[0];
14749 } else {
14750 ctxt->space = NULL;
14751 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014752
14753
14754 ctxt->nodeNr = 0;
14755 ctxt->node = NULL;
14756
14757 ctxt->nameNr = 0;
14758 ctxt->name = NULL;
14759
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014760 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014761 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014762 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014763 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014764 DICT_FREE(ctxt->directory);
14765 ctxt->directory = NULL;
14766 DICT_FREE(ctxt->extSubURI);
14767 ctxt->extSubURI = NULL;
14768 DICT_FREE(ctxt->extSubSystem);
14769 ctxt->extSubSystem = NULL;
14770 if (ctxt->myDoc != NULL)
14771 xmlFreeDoc(ctxt->myDoc);
14772 ctxt->myDoc = NULL;
14773
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014774 ctxt->standalone = -1;
14775 ctxt->hasExternalSubset = 0;
14776 ctxt->hasPErefs = 0;
14777 ctxt->html = 0;
14778 ctxt->external = 0;
14779 ctxt->instate = XML_PARSER_START;
14780 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014781
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014782 ctxt->wellFormed = 1;
14783 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014784 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014785 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014786#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014787 ctxt->vctxt.userData = ctxt;
14788 ctxt->vctxt.error = xmlParserValidityError;
14789 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014790#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014791 ctxt->record_info = 0;
14792 ctxt->nbChars = 0;
14793 ctxt->checkIndex = 0;
14794 ctxt->inSubset = 0;
14795 ctxt->errNo = XML_ERR_OK;
14796 ctxt->depth = 0;
14797 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14798 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014799 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014800 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014801 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014802 xmlInitNodeInfoSeq(&ctxt->node_seq);
14803
14804 if (ctxt->attsDefault != NULL) {
14805 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14806 ctxt->attsDefault = NULL;
14807 }
14808 if (ctxt->attsSpecial != NULL) {
14809 xmlHashFree(ctxt->attsSpecial, NULL);
14810 ctxt->attsSpecial = NULL;
14811 }
14812
Daniel Veillard4432df22003-09-28 18:58:27 +000014813#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014814 if (ctxt->catalogs != NULL)
14815 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014816#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014817 if (ctxt->lastError.code != XML_ERR_OK)
14818 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014819}
14820
14821/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014822 * xmlCtxtResetPush:
14823 * @ctxt: an XML parser context
14824 * @chunk: a pointer to an array of chars
14825 * @size: number of chars in the array
14826 * @filename: an optional file name or URI
14827 * @encoding: the document encoding, or NULL
14828 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014829 * Reset a push parser context
14830 *
14831 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014832 */
14833int
14834xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14835 int size, const char *filename, const char *encoding)
14836{
14837 xmlParserInputPtr inputStream;
14838 xmlParserInputBufferPtr buf;
14839 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14840
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014841 if (ctxt == NULL)
14842 return(1);
14843
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014844 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14845 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14846
14847 buf = xmlAllocParserInputBuffer(enc);
14848 if (buf == NULL)
14849 return(1);
14850
14851 if (ctxt == NULL) {
14852 xmlFreeParserInputBuffer(buf);
14853 return(1);
14854 }
14855
14856 xmlCtxtReset(ctxt);
14857
14858 if (ctxt->pushTab == NULL) {
14859 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14860 sizeof(xmlChar *));
14861 if (ctxt->pushTab == NULL) {
14862 xmlErrMemory(ctxt, NULL);
14863 xmlFreeParserInputBuffer(buf);
14864 return(1);
14865 }
14866 }
14867
14868 if (filename == NULL) {
14869 ctxt->directory = NULL;
14870 } else {
14871 ctxt->directory = xmlParserGetDirectory(filename);
14872 }
14873
14874 inputStream = xmlNewInputStream(ctxt);
14875 if (inputStream == NULL) {
14876 xmlFreeParserInputBuffer(buf);
14877 return(1);
14878 }
14879
14880 if (filename == NULL)
14881 inputStream->filename = NULL;
14882 else
14883 inputStream->filename = (char *)
14884 xmlCanonicPath((const xmlChar *) filename);
14885 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014886 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014887
14888 inputPush(ctxt, inputStream);
14889
14890 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14891 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014892 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14893 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014894
14895 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14896
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014897 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014898#ifdef DEBUG_PUSH
14899 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14900#endif
14901 }
14902
14903 if (encoding != NULL) {
14904 xmlCharEncodingHandlerPtr hdlr;
14905
Daniel Veillard37334572008-07-31 08:20:02 +000014906 if (ctxt->encoding != NULL)
14907 xmlFree((xmlChar *) ctxt->encoding);
14908 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14909
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014910 hdlr = xmlFindCharEncodingHandler(encoding);
14911 if (hdlr != NULL) {
14912 xmlSwitchToEncoding(ctxt, hdlr);
14913 } else {
14914 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14915 "Unsupported encoding %s\n", BAD_CAST encoding);
14916 }
14917 } else if (enc != XML_CHAR_ENCODING_NONE) {
14918 xmlSwitchEncoding(ctxt, enc);
14919 }
14920
14921 return(0);
14922}
14923
Daniel Veillard37334572008-07-31 08:20:02 +000014924
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014925/**
Daniel Veillard37334572008-07-31 08:20:02 +000014926 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014927 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014928 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014929 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014930 *
14931 * Applies the options to the parser context
14932 *
14933 * Returns 0 in case of success, the set of unknown or unimplemented options
14934 * in case of error.
14935 */
Daniel Veillard37334572008-07-31 08:20:02 +000014936static int
14937xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014938{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014939 if (ctxt == NULL)
14940 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014941 if (encoding != NULL) {
14942 if (ctxt->encoding != NULL)
14943 xmlFree((xmlChar *) ctxt->encoding);
14944 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14945 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014946 if (options & XML_PARSE_RECOVER) {
14947 ctxt->recovery = 1;
14948 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014949 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014950 } else
14951 ctxt->recovery = 0;
14952 if (options & XML_PARSE_DTDLOAD) {
14953 ctxt->loadsubset = XML_DETECT_IDS;
14954 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014955 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014956 } else
14957 ctxt->loadsubset = 0;
14958 if (options & XML_PARSE_DTDATTR) {
14959 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14960 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014961 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014962 }
14963 if (options & XML_PARSE_NOENT) {
14964 ctxt->replaceEntities = 1;
14965 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14966 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014967 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014968 } else
14969 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014970 if (options & XML_PARSE_PEDANTIC) {
14971 ctxt->pedantic = 1;
14972 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014973 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014974 } else
14975 ctxt->pedantic = 0;
14976 if (options & XML_PARSE_NOBLANKS) {
14977 ctxt->keepBlanks = 0;
14978 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14979 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014980 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014981 } else
14982 ctxt->keepBlanks = 1;
14983 if (options & XML_PARSE_DTDVALID) {
14984 ctxt->validate = 1;
14985 if (options & XML_PARSE_NOWARNING)
14986 ctxt->vctxt.warning = NULL;
14987 if (options & XML_PARSE_NOERROR)
14988 ctxt->vctxt.error = NULL;
14989 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014990 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014991 } else
14992 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014993 if (options & XML_PARSE_NOWARNING) {
14994 ctxt->sax->warning = NULL;
14995 options -= XML_PARSE_NOWARNING;
14996 }
14997 if (options & XML_PARSE_NOERROR) {
14998 ctxt->sax->error = NULL;
14999 ctxt->sax->fatalError = NULL;
15000 options -= XML_PARSE_NOERROR;
15001 }
Daniel Veillard81273902003-09-30 00:43:48 +000015002#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015003 if (options & XML_PARSE_SAX1) {
15004 ctxt->sax->startElement = xmlSAX2StartElement;
15005 ctxt->sax->endElement = xmlSAX2EndElement;
15006 ctxt->sax->startElementNs = NULL;
15007 ctxt->sax->endElementNs = NULL;
15008 ctxt->sax->initialized = 1;
15009 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015010 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015011 }
Daniel Veillard81273902003-09-30 00:43:48 +000015012#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015013 if (options & XML_PARSE_NODICT) {
15014 ctxt->dictNames = 0;
15015 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015016 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015017 } else {
15018 ctxt->dictNames = 1;
15019 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015020 if (options & XML_PARSE_NOCDATA) {
15021 ctxt->sax->cdataBlock = NULL;
15022 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015023 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015024 }
15025 if (options & XML_PARSE_NSCLEAN) {
15026 ctxt->options |= XML_PARSE_NSCLEAN;
15027 options -= XML_PARSE_NSCLEAN;
15028 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015029 if (options & XML_PARSE_NONET) {
15030 ctxt->options |= XML_PARSE_NONET;
15031 options -= XML_PARSE_NONET;
15032 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015033 if (options & XML_PARSE_COMPACT) {
15034 ctxt->options |= XML_PARSE_COMPACT;
15035 options -= XML_PARSE_COMPACT;
15036 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015037 if (options & XML_PARSE_OLD10) {
15038 ctxt->options |= XML_PARSE_OLD10;
15039 options -= XML_PARSE_OLD10;
15040 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015041 if (options & XML_PARSE_NOBASEFIX) {
15042 ctxt->options |= XML_PARSE_NOBASEFIX;
15043 options -= XML_PARSE_NOBASEFIX;
15044 }
15045 if (options & XML_PARSE_HUGE) {
15046 ctxt->options |= XML_PARSE_HUGE;
15047 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015048 if (ctxt->dict != NULL)
15049 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015050 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015051 if (options & XML_PARSE_OLDSAX) {
15052 ctxt->options |= XML_PARSE_OLDSAX;
15053 options -= XML_PARSE_OLDSAX;
15054 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015055 if (options & XML_PARSE_IGNORE_ENC) {
15056 ctxt->options |= XML_PARSE_IGNORE_ENC;
15057 options -= XML_PARSE_IGNORE_ENC;
15058 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015059 if (options & XML_PARSE_BIG_LINES) {
15060 ctxt->options |= XML_PARSE_BIG_LINES;
15061 options -= XML_PARSE_BIG_LINES;
15062 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015063 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015064 return (options);
15065}
15066
15067/**
Daniel Veillard37334572008-07-31 08:20:02 +000015068 * xmlCtxtUseOptions:
15069 * @ctxt: an XML parser context
15070 * @options: a combination of xmlParserOption
15071 *
15072 * Applies the options to the parser context
15073 *
15074 * Returns 0 in case of success, the set of unknown or unimplemented options
15075 * in case of error.
15076 */
15077int
15078xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15079{
15080 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15081}
15082
15083/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015084 * xmlDoRead:
15085 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015086 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015087 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015088 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015089 * @reuse: keep the context for reuse
15090 *
15091 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015092 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015093 * Returns the resulting document tree or NULL
15094 */
15095static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015096xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15097 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015098{
15099 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015100
15101 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015102 if (encoding != NULL) {
15103 xmlCharEncodingHandlerPtr hdlr;
15104
15105 hdlr = xmlFindCharEncodingHandler(encoding);
15106 if (hdlr != NULL)
15107 xmlSwitchToEncoding(ctxt, hdlr);
15108 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015109 if ((URL != NULL) && (ctxt->input != NULL) &&
15110 (ctxt->input->filename == NULL))
15111 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015112 xmlParseDocument(ctxt);
15113 if ((ctxt->wellFormed) || ctxt->recovery)
15114 ret = ctxt->myDoc;
15115 else {
15116 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015117 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015118 xmlFreeDoc(ctxt->myDoc);
15119 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015120 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015121 ctxt->myDoc = NULL;
15122 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015123 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015124 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015125
15126 return (ret);
15127}
15128
15129/**
15130 * xmlReadDoc:
15131 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015132 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015133 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015134 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015135 *
15136 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015137 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015138 * Returns the resulting document tree
15139 */
15140xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015141xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015142{
15143 xmlParserCtxtPtr ctxt;
15144
15145 if (cur == NULL)
15146 return (NULL);
15147
15148 ctxt = xmlCreateDocParserCtxt(cur);
15149 if (ctxt == NULL)
15150 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015151 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015152}
15153
15154/**
15155 * xmlReadFile:
15156 * @filename: a file or URL
15157 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015158 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015159 *
15160 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015161 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015162 * Returns the resulting document tree
15163 */
15164xmlDocPtr
15165xmlReadFile(const char *filename, const char *encoding, int options)
15166{
15167 xmlParserCtxtPtr ctxt;
15168
Daniel Veillard61b93382003-11-03 14:28:31 +000015169 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015170 if (ctxt == NULL)
15171 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015172 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015173}
15174
15175/**
15176 * xmlReadMemory:
15177 * @buffer: a pointer to a char array
15178 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015179 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015180 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015181 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015182 *
15183 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015184 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015185 * Returns the resulting document tree
15186 */
15187xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015188xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015189{
15190 xmlParserCtxtPtr ctxt;
15191
15192 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15193 if (ctxt == NULL)
15194 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015195 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015196}
15197
15198/**
15199 * xmlReadFd:
15200 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015201 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015202 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015203 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015204 *
15205 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015206 * NOTE that the file descriptor will not be closed when the
15207 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015208 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015209 * Returns the resulting document tree
15210 */
15211xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015212xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015213{
15214 xmlParserCtxtPtr ctxt;
15215 xmlParserInputBufferPtr input;
15216 xmlParserInputPtr stream;
15217
15218 if (fd < 0)
15219 return (NULL);
15220
15221 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15222 if (input == NULL)
15223 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015224 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015225 ctxt = xmlNewParserCtxt();
15226 if (ctxt == NULL) {
15227 xmlFreeParserInputBuffer(input);
15228 return (NULL);
15229 }
15230 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15231 if (stream == NULL) {
15232 xmlFreeParserInputBuffer(input);
15233 xmlFreeParserCtxt(ctxt);
15234 return (NULL);
15235 }
15236 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015237 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015238}
15239
15240/**
15241 * xmlReadIO:
15242 * @ioread: an I/O read function
15243 * @ioclose: an I/O close function
15244 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015245 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015246 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015247 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015248 *
15249 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015250 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015251 * Returns the resulting document tree
15252 */
15253xmlDocPtr
15254xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015255 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015256{
15257 xmlParserCtxtPtr ctxt;
15258 xmlParserInputBufferPtr input;
15259 xmlParserInputPtr stream;
15260
15261 if (ioread == NULL)
15262 return (NULL);
15263
15264 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15265 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015266 if (input == NULL) {
15267 if (ioclose != NULL)
15268 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015269 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015270 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015271 ctxt = xmlNewParserCtxt();
15272 if (ctxt == NULL) {
15273 xmlFreeParserInputBuffer(input);
15274 return (NULL);
15275 }
15276 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15277 if (stream == NULL) {
15278 xmlFreeParserInputBuffer(input);
15279 xmlFreeParserCtxt(ctxt);
15280 return (NULL);
15281 }
15282 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015283 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015284}
15285
15286/**
15287 * xmlCtxtReadDoc:
15288 * @ctxt: an XML parser context
15289 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015290 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015291 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015292 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015293 *
15294 * parse an XML in-memory document and build a tree.
15295 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015296 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015297 * Returns the resulting document tree
15298 */
15299xmlDocPtr
15300xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015301 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015302{
15303 xmlParserInputPtr stream;
15304
15305 if (cur == NULL)
15306 return (NULL);
15307 if (ctxt == NULL)
15308 return (NULL);
15309
15310 xmlCtxtReset(ctxt);
15311
15312 stream = xmlNewStringInputStream(ctxt, cur);
15313 if (stream == NULL) {
15314 return (NULL);
15315 }
15316 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015317 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015318}
15319
15320/**
15321 * xmlCtxtReadFile:
15322 * @ctxt: an XML parser context
15323 * @filename: a file or URL
15324 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015325 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015326 *
15327 * parse an XML file from the filesystem or the network.
15328 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015329 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015330 * Returns the resulting document tree
15331 */
15332xmlDocPtr
15333xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15334 const char *encoding, int options)
15335{
15336 xmlParserInputPtr stream;
15337
15338 if (filename == NULL)
15339 return (NULL);
15340 if (ctxt == NULL)
15341 return (NULL);
15342
15343 xmlCtxtReset(ctxt);
15344
Daniel Veillard29614c72004-11-26 10:47:26 +000015345 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015346 if (stream == NULL) {
15347 return (NULL);
15348 }
15349 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015350 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015351}
15352
15353/**
15354 * xmlCtxtReadMemory:
15355 * @ctxt: an XML parser context
15356 * @buffer: a pointer to a char array
15357 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015358 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015359 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015360 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015361 *
15362 * parse an XML in-memory document and build a tree.
15363 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015364 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015365 * Returns the resulting document tree
15366 */
15367xmlDocPtr
15368xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015369 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015370{
15371 xmlParserInputBufferPtr input;
15372 xmlParserInputPtr stream;
15373
15374 if (ctxt == NULL)
15375 return (NULL);
15376 if (buffer == NULL)
15377 return (NULL);
15378
15379 xmlCtxtReset(ctxt);
15380
15381 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15382 if (input == NULL) {
15383 return(NULL);
15384 }
15385
15386 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15387 if (stream == NULL) {
15388 xmlFreeParserInputBuffer(input);
15389 return(NULL);
15390 }
15391
15392 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015393 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015394}
15395
15396/**
15397 * xmlCtxtReadFd:
15398 * @ctxt: an XML parser context
15399 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015400 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015401 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015402 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015403 *
15404 * parse an XML from a file descriptor and build a tree.
15405 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015406 * NOTE that the file descriptor will not be closed when the
15407 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015408 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015409 * Returns the resulting document tree
15410 */
15411xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015412xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15413 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015414{
15415 xmlParserInputBufferPtr input;
15416 xmlParserInputPtr stream;
15417
15418 if (fd < 0)
15419 return (NULL);
15420 if (ctxt == NULL)
15421 return (NULL);
15422
15423 xmlCtxtReset(ctxt);
15424
15425
15426 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15427 if (input == NULL)
15428 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015429 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015430 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15431 if (stream == NULL) {
15432 xmlFreeParserInputBuffer(input);
15433 return (NULL);
15434 }
15435 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015436 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015437}
15438
15439/**
15440 * xmlCtxtReadIO:
15441 * @ctxt: an XML parser context
15442 * @ioread: an I/O read function
15443 * @ioclose: an I/O close function
15444 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015445 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015446 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015447 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015448 *
15449 * parse an XML document from I/O functions and source and build a tree.
15450 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015451 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015452 * Returns the resulting document tree
15453 */
15454xmlDocPtr
15455xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15456 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015457 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015458 const char *encoding, int options)
15459{
15460 xmlParserInputBufferPtr input;
15461 xmlParserInputPtr stream;
15462
15463 if (ioread == NULL)
15464 return (NULL);
15465 if (ctxt == NULL)
15466 return (NULL);
15467
15468 xmlCtxtReset(ctxt);
15469
15470 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15471 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015472 if (input == NULL) {
15473 if (ioclose != NULL)
15474 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015475 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015476 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015477 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15478 if (stream == NULL) {
15479 xmlFreeParserInputBuffer(input);
15480 return (NULL);
15481 }
15482 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015483 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015484}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015485
15486#define bottom_parser
15487#include "elfgcchack.h"