blob: 4739add7545b1edcfb69a94ea9291d13bf06d944 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800125 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 if (replacement != 0) {
134 if (replacement < XML_MAX_TEXT_LENGTH)
135 return(0);
136
137 /*
138 * If the volume of entity copy reaches 10 times the
139 * amount of parsed data and over the large text threshold
140 * then that's very likely to be an abuse.
141 */
142 if (ctxt->input != NULL) {
143 consumed = ctxt->input->consumed +
144 (ctxt->input->cur - ctxt->input->base);
145 }
146 consumed += ctxt->sizeentities;
147
148 if (replacement < XML_PARSER_NON_LINEAR * consumed)
149 return(0);
150 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000151 /*
152 * Do the check based on the replacement size of the entity
153 */
154 if (size < XML_PARSER_BIG_ENTITY)
155 return(0);
156
157 /*
158 * A limit on the amount of text data reasonably used
159 */
160 if (ctxt->input != NULL) {
161 consumed = ctxt->input->consumed +
162 (ctxt->input->cur - ctxt->input->base);
163 }
164 consumed += ctxt->sizeentities;
165
166 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168 return (0);
169 } else if (ent != NULL) {
170 /*
171 * use the number of parsed entities in the replacement
172 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800173 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000174
175 /*
176 * The amount of data parsed counting entities size only once
177 */
178 if (ctxt->input != NULL) {
179 consumed = ctxt->input->consumed +
180 (ctxt->input->cur - ctxt->input->base);
181 }
182 consumed += ctxt->sizeentities;
183
184 /*
185 * Check the density of entities for the amount of data
186 * knowing an entity reference will take at least 3 bytes
187 */
188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189 return (0);
190 } else {
191 /*
192 * strange we got no data for checking just return
193 */
194 return (0);
195 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 return (1);
198}
199
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000200/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000201 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000203 * arbitrary depth limit for the XML documents that we allow to
204 * process. This is not a limitation of the parser but a safety
205 * boundary feature. It can be disabled with the XML_PARSE_HUGE
206 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000207 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000208unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard0fb18932003-09-07 09:14:37 +0000210
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000213#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000214#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000215#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216
Daniel Veillard1f972e92012-08-15 10:16:37 +0800217/**
218 * XML_PARSER_CHUNK_SIZE
219 *
220 * When calling GROW that's the minimal amount of data
221 * the parser expected to have received. It is not a hard
222 * limit but an optimization when reading strings like Names
223 * It is not strictly needed as long as inputs available characters
224 * are followed by 0, which should be provided by the I/O level
225 */
226#define XML_PARSER_CHUNK_SIZE 100
227
Owen Taylor3473f882001-02-23 17:55:21 +0000228/*
Owen Taylor3473f882001-02-23 17:55:21 +0000229 * List of XML prefixed PI allowed by W3C specs
230 */
231
Daniel Veillardb44025c2001-10-11 22:55:55 +0000232static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000233 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800234 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000235 NULL
236};
237
Daniel Veillarda07050d2003-10-19 14:46:32 +0000238
Owen Taylor3473f882001-02-23 17:55:21 +0000239/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200240static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000242
Daniel Veillard7d515752003-09-26 19:12:37 +0000243static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000244xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000246 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000247 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard37334572008-07-31 08:20:02 +0000249static int
250xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000252#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000253static void
254xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000256#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000257
Daniel Veillard7d515752003-09-26 19:12:37 +0000258static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000259xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000261
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000262static int
263xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264
Daniel Veillarde57ec792003-09-10 10:50:59 +0000265/************************************************************************
266 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800267 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 * *
269 ************************************************************************/
270
271/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 * xmlErrAttributeDup:
273 * @ctxt: an XML parser context
274 * @prefix: the attribute prefix
275 * @localname: the attribute localname
276 *
277 * Handle a redefinition of attribute error
278 */
279static void
280xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281 const xmlChar * localname)
282{
Daniel Veillard157fee02003-10-31 10:36:03 +0000283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284 (ctxt->instate == XML_PARSER_EOF))
285 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000286 if (ctxt != NULL)
287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200288
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000289 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 (const char *) localname, NULL, NULL, 0, 0,
293 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000294 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 (const char *) prefix, (const char *) localname,
298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000300 if (ctxt != NULL) {
301 ctxt->wellFormed = 0;
302 if (ctxt->recovery == 0)
303 ctxt->disableSAX = 1;
304 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305}
306
307/**
308 * xmlFatalErr:
309 * @ctxt: an XML parser context
310 * @error: the error number
311 * @extra: extra information string
312 *
313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314 */
315static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317{
318 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800319 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320
Daniel Veillard157fee02003-10-31 10:36:03 +0000321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322 (ctxt->instate == XML_PARSER_EOF))
323 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 switch (error) {
325 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800326 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800329 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800332 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "internal error";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800338 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800341 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800344 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800350 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800353 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800356 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "Fragment not allowed";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
440 case XML_ERR_CONDSEC_INVALID_KEYWORD:
441 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800442 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800445 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800448 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800451 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800454 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000456 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000492 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800495 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 if (info == NULL)
507 snprintf(errstr, 128, "%s\n", errmsg);
508 else
509 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000510 if (ctxt != NULL)
511 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000514 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL) {
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0)
518 ctxt->disableSAX = 1;
519 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520}
521
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000522/**
523 * xmlFatalErrMsg:
524 * @ctxt: an XML parser context
525 * @error: the error number
526 * @msg: the error message
527 *
528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529 */
530static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000533{
Daniel Veillard157fee02003-10-31 10:36:03 +0000534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535 (ctxt->instate == XML_PARSER_EOF))
536 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000537 if (ctxt != NULL)
538 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000546}
547
548/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000549 * xmlWarningMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 * @str2: extra data
555 *
556 * Handle a warning.
557 */
558static void
559xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg, const xmlChar *str1, const xmlChar *str2)
561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000563
Daniel Veillard157fee02003-10-31 10:36:03 +0000564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000569 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200570 if (ctxt != NULL) {
571 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000572 (ctxt->sax) ? ctxt->sax->warning : NULL,
573 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_WARNING, NULL, 0,
576 (const char *) str1, (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200578 } else {
579 __xmlRaiseError(schannel, NULL, NULL,
580 ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_WARNING, NULL, 0,
582 (const char *) str1, (const char *) str2, NULL, 0, 0,
583 msg, (const char *) str1, (const char *) str2);
584 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585}
586
587/**
588 * xmlValidityError:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the error message
592 * @str1: extra data
593 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000594 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595 */
596static void
597xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000598 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000599{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000600 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000601
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->errNo = error;
607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608 schannel = ctxt->sax->serror;
609 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200610 if (ctxt != NULL) {
611 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000612 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613 ctxt, NULL, XML_FROM_DTD, error,
614 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000615 (const char *) str2, NULL, 0, 0,
616 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000617 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200618 } else {
619 __xmlRaiseError(schannel, NULL, NULL,
620 ctxt, NULL, XML_FROM_DTD, error,
621 XML_ERR_ERROR, NULL, 0, (const char *) str1,
622 (const char *) str2, NULL, 0, 0,
623 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000624 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000625}
626
627/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000628 * xmlFatalErrMsgInt:
629 * @ctxt: an XML parser context
630 * @error: the error number
631 * @msg: the error message
632 * @val: an integer value
633 *
634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635 */
636static void
637xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000638 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000639{
Daniel Veillard157fee02003-10-31 10:36:03 +0000640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000643 if (ctxt != NULL)
644 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000645 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000648 if (ctxt != NULL) {
649 ctxt->wellFormed = 0;
650 if (ctxt->recovery == 0)
651 ctxt->disableSAX = 1;
652 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000653}
654
655/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000656 * xmlFatalErrMsgStrIntStr:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @str1: an string info
661 * @val: an integer value
662 * @str2: an string info
663 *
664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665 */
666static void
667xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800668 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000669 const xmlChar *str2)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678 NULL, 0, (const char *) str1, (const char *) str2,
679 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000680 if (ctxt != NULL) {
681 ctxt->wellFormed = 0;
682 if (ctxt->recovery == 0)
683 ctxt->disableSAX = 1;
684 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000685}
686
687/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000688 * xmlFatalErrMsgStr:
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the error message
692 * @val: a string value
693 *
694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695 */
696static void
697xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000698 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000699{
Daniel Veillard157fee02003-10-31 10:36:03 +0000700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701 (ctxt->instate == XML_PARSER_EOF))
702 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000706 XML_FROM_PARSER, error, XML_ERR_FATAL,
707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL) {
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
713 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000714}
715
716/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000717 * xmlErrMsgStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @val: a string value
722 *
723 * Handle a non fatal parser error
724 */
725static void
726xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727 const char *msg, const xmlChar * val)
728{
Daniel Veillard157fee02003-10-31 10:36:03 +0000729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730 (ctxt->instate == XML_PARSER_EOF))
731 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000732 if (ctxt != NULL)
733 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000735 XML_FROM_PARSER, error, XML_ERR_ERROR,
736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737 val);
738}
739
740/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000741 * xmlNsErr:
742 * @ctxt: an XML parser context
743 * @error: the error number
744 * @msg: the message
745 * @info1: extra information string
746 * @info2: extra information string
747 *
748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749 */
750static void
751xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000753 const xmlChar * info1, const xmlChar * info2,
754 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000755{
Daniel Veillard157fee02003-10-31 10:36:03 +0000756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757 (ctxt->instate == XML_PARSER_EOF))
758 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000759 if (ctxt != NULL)
760 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000762 XML_ERR_ERROR, NULL, 0, (const char *) info1,
763 (const char *) info2, (const char *) info3, 0, 0, msg,
764 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000765 if (ctxt != NULL)
766 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000767}
768
Daniel Veillard37334572008-07-31 08:20:02 +0000769/**
770 * xmlNsWarn
771 * @ctxt: an XML parser context
772 * @error: the error number
773 * @msg: the message
774 * @info1: extra information string
775 * @info2: extra information string
776 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800777 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000778 */
779static void
780xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781 const char *msg,
782 const xmlChar * info1, const xmlChar * info2,
783 const xmlChar * info3)
784{
785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786 (ctxt->instate == XML_PARSER_EOF))
787 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789 XML_ERR_WARNING, NULL, 0, (const char *) info1,
790 (const char *) info2, (const char *) info3, 0, 0, msg,
791 info1, info2, info3);
792}
793
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000794/************************************************************************
795 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800796 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797 * *
798 ************************************************************************/
799
800/**
801 * xmlHasFeature:
802 * @feature: the feature to be examined
803 *
804 * Examines if the library has been compiled with a given feature.
805 *
806 * Returns a non-zero value if the feature exist, otherwise zero.
807 * Returns zero (0) if the feature does not exist or an unknown
808 * unknown feature is requested, non-zero otherwise.
809 */
810int
811xmlHasFeature(xmlFeature feature)
812{
813 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_THREAD_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_TREE_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_OUTPUT_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_PUSH_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_READER_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_PATTERN_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_WRITER_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_SAX1_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_FTP_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_HTTP_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_VALID_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_HTML_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_LEGACY_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_C14N_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_CATALOG_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_XPATH_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_XPTR_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_XINCLUDE_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_ICONV_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_ISO8859X_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_UNICODE_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_REGEXP_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_AUTOMATA_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_EXPR_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_SCHEMAS_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_SCHEMATRON_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_MODULES_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_DEBUG_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef DEBUG_MEMORY_LOCATION
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_DEBUG_RUNTIME
990 return(1);
991#else
992 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000993#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000994 case XML_WITH_ZLIB:
995#ifdef LIBXML_ZLIB_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001000 case XML_WITH_LZMA:
1001#ifdef LIBXML_LZMA_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 case XML_WITH_ICU:
1007#ifdef LIBXML_ICU_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012 default:
1013 break;
1014 }
1015 return(0);
1016}
1017
1018/************************************************************************
1019 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001020 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 * *
1022 ************************************************************************/
1023
1024/**
1025 * xmlDetectSAX2:
1026 * @ctxt: an XML parser context
1027 *
1028 * Do the SAX2 detection and specific intialization
1029 */
1030static void
1031xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035 ((ctxt->sax->startElementNs != NULL) ||
1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001037#else
1038 ctxt->sax2 = 1;
1039#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040
1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001046 xmlErrMemory(ctxt, NULL);
1047 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001048}
1049
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050typedef struct _xmlDefAttrs xmlDefAttrs;
1051typedef xmlDefAttrs *xmlDefAttrsPtr;
1052struct _xmlDefAttrs {
1053 int nbAttrs; /* number of defaulted attributes on that element */
1054 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001055 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057
1058/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001059 * xmlAttrNormalizeSpace:
1060 * @src: the source string
1061 * @dst: the target string
1062 *
1063 * Normalize the space in non CDATA attribute values:
1064 * If the attribute type is not CDATA, then the XML processor MUST further
1065 * process the normalized attribute value by discarding any leading and
1066 * trailing space (#x20) characters, and by replacing sequences of space
1067 * (#x20) characters by a single space (#x20) character.
1068 * Note that the size of dst need to be at least src, and if one doesn't need
1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070 * passing src as dst is just fine.
1071 *
1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073 * is needed.
1074 */
1075static xmlChar *
1076xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077{
1078 if ((src == NULL) || (dst == NULL))
1079 return(NULL);
1080
1081 while (*src == 0x20) src++;
1082 while (*src != 0) {
1083 if (*src == 0x20) {
1084 while (*src == 0x20) src++;
1085 if (*src != 0)
1086 *dst++ = 0x20;
1087 } else {
1088 *dst++ = *src++;
1089 }
1090 }
1091 *dst = 0;
1092 if (dst == src)
1093 return(NULL);
1094 return(dst);
1095}
1096
1097/**
1098 * xmlAttrNormalizeSpace2:
1099 * @src: the source string
1100 *
1101 * Normalize the space in non CDATA attribute values, a slightly more complex
1102 * front end to avoid allocation problems when running on attribute values
1103 * coming from the input.
1104 *
1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106 * is needed.
1107 */
1108static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001109xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001110{
1111 int i;
1112 int remove_head = 0;
1113 int need_realloc = 0;
1114 const xmlChar *cur;
1115
1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117 return(NULL);
1118 i = *len;
1119 if (i <= 0)
1120 return(NULL);
1121
1122 cur = src;
1123 while (*cur == 0x20) {
1124 cur++;
1125 remove_head++;
1126 }
1127 while (*cur != 0) {
1128 if (*cur == 0x20) {
1129 cur++;
1130 if ((*cur == 0x20) || (*cur == 0)) {
1131 need_realloc = 1;
1132 break;
1133 }
1134 } else
1135 cur++;
1136 }
1137 if (need_realloc) {
1138 xmlChar *ret;
1139
1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141 if (ret == NULL) {
1142 xmlErrMemory(ctxt, NULL);
1143 return(NULL);
1144 }
1145 xmlAttrNormalizeSpace(ret, ret);
1146 *len = (int) strlen((const char *)ret);
1147 return(ret);
1148 } else if (remove_head) {
1149 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 memmove(src, src + remove_head, 1 + *len);
1151 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001152 }
1153 return(NULL);
1154}
1155
1156/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 * xmlAddDefAttrs:
1158 * @ctxt: an XML parser context
1159 * @fullname: the element fullname
1160 * @fullattr: the attribute fullname
1161 * @value: the attribute value
1162 *
1163 * Add a defaulted attribute for an element
1164 */
1165static void
1166xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167 const xmlChar *fullname,
1168 const xmlChar *fullattr,
1169 const xmlChar *value) {
1170 xmlDefAttrsPtr defaults;
1171 int len;
1172 const xmlChar *name;
1173 const xmlChar *prefix;
1174
Daniel Veillard6a31b832008-03-26 14:06:44 +00001175 /*
1176 * Allows to detect attribute redefinitions
1177 */
1178 if (ctxt->attsSpecial != NULL) {
1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180 return;
1181 }
1182
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 if (ctxt->attsDefault == NULL)
1186 goto mem_error;
1187 }
1188
1189 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 * split the element name into prefix:localname , the string found
1191 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 */
1193 name = xmlSplitQName3(fullname, &len);
1194 if (name == NULL) {
1195 name = xmlDictLookup(ctxt->dict, fullname, -1);
1196 prefix = NULL;
1197 } else {
1198 name = xmlDictLookup(ctxt->dict, name, -1);
1199 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200 }
1201
1202 /*
1203 * make sure there is some storage
1204 */
1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206 if (defaults == NULL) {
1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 if (defaults == NULL)
1210 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001212 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214 defaults, NULL) < 0) {
1215 xmlFree(defaults);
1216 goto mem_error;
1217 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001219 xmlDefAttrsPtr temp;
1220
1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001223 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228 defaults, NULL) < 0) {
1229 xmlFree(defaults);
1230 goto mem_error;
1231 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 }
1233
1234 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001235 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001236 * are within the DTD and hen not associated to namespace names.
1237 */
1238 name = xmlSplitQName3(fullattr, &len);
1239 if (name == NULL) {
1240 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241 prefix = NULL;
1242 } else {
1243 name = xmlDictLookup(ctxt->dict, name, -1);
1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245 }
1246
Daniel Veillardae0765b2008-07-31 19:54:59 +00001247 defaults->values[5 * defaults->nbAttrs] = name;
1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 /* intern the string and precompute the end */
1250 len = xmlStrlen(value);
1251 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001252 defaults->values[5 * defaults->nbAttrs + 2] = value;
1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254 if (ctxt->external)
1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256 else
1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 defaults->nbAttrs++;
1259
1260 return;
1261
1262mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001263 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 return;
1265}
1266
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001267/**
1268 * xmlAddSpecialAttr:
1269 * @ctxt: an XML parser context
1270 * @fullname: the element fullname
1271 * @fullattr: the attribute fullname
1272 * @type: the attribute type
1273 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001275 */
1276static void
1277xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278 const xmlChar *fullname,
1279 const xmlChar *fullattr,
1280 int type)
1281{
1282 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001284 if (ctxt->attsSpecial == NULL)
1285 goto mem_error;
1286 }
1287
Daniel Veillardac4118d2008-01-11 05:27:32 +00001288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289 return;
1290
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001293 return;
1294
1295mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001296 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001297 return;
1298}
1299
Daniel Veillard4432df22003-09-28 18:58:27 +00001300/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001301 * xmlCleanSpecialAttrCallback:
1302 *
1303 * Removes CDATA attributes from the special attribute table
1304 */
1305static void
1306xmlCleanSpecialAttrCallback(void *payload, void *data,
1307 const xmlChar *fullname, const xmlChar *fullattr,
1308 const xmlChar *unused ATTRIBUTE_UNUSED) {
1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001311 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313 }
1314}
1315
1316/**
1317 * xmlCleanSpecialAttr:
1318 * @ctxt: an XML parser context
1319 *
1320 * Trim the list of attributes defined to remove all those of type
1321 * CDATA as they are not special. This call should be done when finishing
1322 * to parse the DTD and before starting to parse the document root.
1323 */
1324static void
1325xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326{
1327 if (ctxt->attsSpecial == NULL)
1328 return;
1329
1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331
1332 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333 xmlHashFree(ctxt->attsSpecial, NULL);
1334 ctxt->attsSpecial = NULL;
1335 }
1336 return;
1337}
1338
1339/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001340 * xmlCheckLanguageID:
1341 * @lang: pointer to the string value
1342 *
1343 * Checks that the value conforms to the LanguageID production:
1344 *
1345 * NOTE: this is somewhat deprecated, those productions were removed from
1346 * the XML Second edition.
1347 *
1348 * [33] LanguageID ::= Langcode ('-' Subcode)*
1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353 * [38] Subcode ::= ([a-z] | [A-Z])+
1354 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001355 * The current REC reference the sucessors of RFC 1766, currently 5646
1356 *
1357 * http://www.rfc-editor.org/rfc/rfc5646.txt
1358 * langtag = language
1359 * ["-" script]
1360 * ["-" region]
1361 * *("-" variant)
1362 * *("-" extension)
1363 * ["-" privateuse]
1364 * language = 2*3ALPHA ; shortest ISO 639 code
1365 * ["-" extlang] ; sometimes followed by
1366 * ; extended language subtags
1367 * / 4ALPHA ; or reserved for future use
1368 * / 5*8ALPHA ; or registered language subtag
1369 *
1370 * extlang = 3ALPHA ; selected ISO 639 codes
1371 * *2("-" 3ALPHA) ; permanently reserved
1372 *
1373 * script = 4ALPHA ; ISO 15924 code
1374 *
1375 * region = 2ALPHA ; ISO 3166-1 code
1376 * / 3DIGIT ; UN M.49 code
1377 *
1378 * variant = 5*8alphanum ; registered variants
1379 * / (DIGIT 3alphanum)
1380 *
1381 * extension = singleton 1*("-" (2*8alphanum))
1382 *
1383 * ; Single alphanumerics
1384 * ; "x" reserved for private use
1385 * singleton = DIGIT ; 0 - 9
1386 * / %x41-57 ; A - W
1387 * / %x59-5A ; Y - Z
1388 * / %x61-77 ; a - w
1389 * / %x79-7A ; y - z
1390 *
1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392 * The parser below doesn't try to cope with extension or privateuse
1393 * that could be added but that's not interoperable anyway
1394 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001395 * Returns 1 if correct 0 otherwise
1396 **/
1397int
1398xmlCheckLanguageID(const xmlChar * lang)
1399{
Daniel Veillard60587d62010-11-04 15:16:27 +01001400 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001401
1402 if (cur == NULL)
1403 return (0);
1404 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001405 ((cur[0] == 'I') && (cur[1] == '-')) ||
1406 ((cur[0] == 'x') && (cur[1] == '-')) ||
1407 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001408 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001409 * Still allow IANA code and user code which were coming
1410 * from the previous version of the XML-1.0 specification
1411 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001412 */
1413 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001415 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001417 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001418 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001419 nxt = cur;
1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422 nxt++;
1423 if (nxt - cur >= 4) {
1424 /*
1425 * Reserved
1426 */
1427 if ((nxt - cur > 8) || (nxt[0] != 0))
1428 return(0);
1429 return(1);
1430 }
1431 if (nxt - cur < 2)
1432 return(0);
1433 /* we got an ISO 639 code */
1434 if (nxt[0] == 0)
1435 return(1);
1436 if (nxt[0] != '-')
1437 return(0);
1438
1439 nxt++;
1440 cur = nxt;
1441 /* now we can have extlang or script or region or variant */
1442 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443 goto region_m49;
1444
1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447 nxt++;
1448 if (nxt - cur == 4)
1449 goto script;
1450 if (nxt - cur == 2)
1451 goto region;
1452 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453 goto variant;
1454 if (nxt - cur != 3)
1455 return(0);
1456 /* we parsed an extlang */
1457 if (nxt[0] == 0)
1458 return(1);
1459 if (nxt[0] != '-')
1460 return(0);
1461
1462 nxt++;
1463 cur = nxt;
1464 /* now we can have script or region or variant */
1465 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466 goto region_m49;
1467
1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470 nxt++;
1471 if (nxt - cur == 2)
1472 goto region;
1473 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474 goto variant;
1475 if (nxt - cur != 4)
1476 return(0);
1477 /* we parsed a script */
1478script:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483
1484 nxt++;
1485 cur = nxt;
1486 /* now we can have region or variant */
1487 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488 goto region_m49;
1489
1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492 nxt++;
1493
1494 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495 goto variant;
1496 if (nxt - cur != 2)
1497 return(0);
1498 /* we parsed a region */
1499region:
1500 if (nxt[0] == 0)
1501 return(1);
1502 if (nxt[0] != '-')
1503 return(0);
1504
1505 nxt++;
1506 cur = nxt;
1507 /* now we can just have a variant */
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511
1512 if ((nxt - cur < 5) || (nxt - cur > 8))
1513 return(0);
1514
1515 /* we parsed a variant */
1516variant:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001522 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001523
1524region_m49:
1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527 nxt += 3;
1528 goto region;
1529 }
1530 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001531}
1532
Owen Taylor3473f882001-02-23 17:55:21 +00001533/************************************************************************
1534 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001535 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001536 * *
1537 ************************************************************************/
1538
Daniel Veillard8ed10722009-08-20 19:17:36 +02001539static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001541
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542#ifdef SAX2
1543/**
1544 * nsPush:
1545 * @ctxt: an XML parser context
1546 * @prefix: the namespace prefix or NULL
1547 * @URL: the namespace name
1548 *
1549 * Pushes a new parser namespace on top of the ns stack
1550 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001551 * Returns -1 in case of error, -2 if the namespace should be discarded
1552 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 */
1554static int
1555nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001557 if (ctxt->options & XML_PARSE_NSCLEAN) {
1558 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001560 if (ctxt->nsTab[i] == prefix) {
1561 /* in scope */
1562 if (ctxt->nsTab[i + 1] == URL)
1563 return(-2);
1564 /* out of scope keep it */
1565 break;
1566 }
1567 }
1568 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570 ctxt->nsMax = 10;
1571 ctxt->nsNr = 0;
1572 ctxt->nsTab = (const xmlChar **)
1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001575 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001576 ctxt->nsMax = 0;
1577 return (-1);
1578 }
1579 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001580 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001586 ctxt->nsMax /= 2;
1587 return (-1);
1588 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001589 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 }
1591 ctxt->nsTab[ctxt->nsNr++] = prefix;
1592 ctxt->nsTab[ctxt->nsNr++] = URL;
1593 return (ctxt->nsNr);
1594}
1595/**
1596 * nsPop:
1597 * @ctxt: an XML parser context
1598 * @nr: the number to pop
1599 *
1600 * Pops the top @nr parser prefix/namespace from the ns stack
1601 *
1602 * Returns the number of namespaces removed
1603 */
1604static int
1605nsPop(xmlParserCtxtPtr ctxt, int nr)
1606{
1607 int i;
1608
1609 if (ctxt->nsTab == NULL) return(0);
1610 if (ctxt->nsNr < nr) {
1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612 nr = ctxt->nsNr;
1613 }
1614 if (ctxt->nsNr <= 0)
1615 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001616
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 for (i = 0;i < nr;i++) {
1618 ctxt->nsNr--;
1619 ctxt->nsTab[ctxt->nsNr] = NULL;
1620 }
1621 return(nr);
1622}
1623#endif
1624
1625static int
1626xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001628 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001629 int maxatts;
1630
1631 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001633 atts = (const xmlChar **)
1634 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638 if (attallocs == NULL) goto mem_error;
1639 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001640 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001641 } else if (nr + 5 > ctxt->maxatts) {
1642 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001645 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001646 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648 (maxatts / 5) * sizeof(int));
1649 if (attallocs == NULL) goto mem_error;
1650 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001651 ctxt->maxatts = maxatts;
1652 }
1653 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001654mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657}
1658
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659/**
1660 * inputPush:
1661 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001662 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 *
1664 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001665 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001666 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001667 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001668int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001671 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001672 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 if (ctxt->inputNr >= ctxt->inputMax) {
1674 ctxt->inputMax *= 2;
1675 ctxt->inputTab =
1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677 ctxt->inputMax *
1678 sizeof(ctxt->inputTab[0]));
1679 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001681 xmlFreeInputStream(value);
1682 ctxt->inputMax /= 2;
1683 value = NULL;
1684 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001685 }
1686 }
1687 ctxt->inputTab[ctxt->inputNr] = value;
1688 ctxt->input = value;
1689 return (ctxt->inputNr++);
1690}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001693 * @ctxt: an XML parser context
1694 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001696 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001699xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700inputPop(xmlParserCtxtPtr ctxt)
1701{
1702 xmlParserInputPtr ret;
1703
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001704 if (ctxt == NULL)
1705 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001707 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 ctxt->inputNr--;
1709 if (ctxt->inputNr > 0)
1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711 else
1712 ctxt->input = NULL;
1713 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001714 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715 return (ret);
1716}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001717/**
1718 * nodePush:
1719 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001720 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 *
1722 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001723 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001724 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001729 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001731 xmlNodePtr *tmp;
1732
1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001736 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001737 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001738 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001739 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001740 ctxt->nodeTab = tmp;
1741 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001742 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001747 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001748 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001749 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001750 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001751 ctxt->nodeTab[ctxt->nodeNr] = value;
1752 ctxt->node = value;
1753 return (ctxt->nodeNr++);
1754}
Daniel Veillard8915c152008-08-26 13:05:34 +00001755
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756/**
1757 * nodePop:
1758 * @ctxt: an XML parser context
1759 *
1760 * Pops the top element node from the node stack
1761 *
1762 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001763 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001764xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765nodePop(xmlParserCtxtPtr ctxt)
1766{
1767 xmlNodePtr ret;
1768
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001771 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001772 ctxt->nodeNr--;
1773 if (ctxt->nodeNr > 0)
1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775 else
1776 ctxt->node = NULL;
1777 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001778 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 return (ret);
1780}
Daniel Veillarda2351322004-06-27 12:08:10 +00001781
1782#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001784 * nameNsPush:
1785 * @ctxt: an XML parser context
1786 * @value: the element name
1787 * @prefix: the element prefix
1788 * @URI: the element namespace name
1789 *
1790 * Pushes a new element name/prefix/URL on top of the name stack
1791 *
1792 * Returns -1 in case of error, the index in the stack otherwise
1793 */
1794static int
1795nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797{
1798 if (ctxt->nameNr >= ctxt->nameMax) {
1799 const xmlChar * *tmp;
1800 void **tmp2;
1801 ctxt->nameMax *= 2;
1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803 ctxt->nameMax *
1804 sizeof(ctxt->nameTab[0]));
1805 if (tmp == NULL) {
1806 ctxt->nameMax /= 2;
1807 goto mem_error;
1808 }
1809 ctxt->nameTab = tmp;
1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811 ctxt->nameMax * 3 *
1812 sizeof(ctxt->pushTab[0]));
1813 if (tmp2 == NULL) {
1814 ctxt->nameMax /= 2;
1815 goto mem_error;
1816 }
1817 ctxt->pushTab = tmp2;
1818 }
1819 ctxt->nameTab[ctxt->nameNr] = value;
1820 ctxt->name = value;
1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 return (ctxt->nameNr++);
1825mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 return (-1);
1828}
1829/**
1830 * nameNsPop:
1831 * @ctxt: an XML parser context
1832 *
1833 * Pops the top element/prefix/URI name from the name stack
1834 *
1835 * Returns the name just removed
1836 */
1837static const xmlChar *
1838nameNsPop(xmlParserCtxtPtr ctxt)
1839{
1840 const xmlChar *ret;
1841
1842 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001843 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844 ctxt->nameNr--;
1845 if (ctxt->nameNr > 0)
1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847 else
1848 ctxt->name = NULL;
1849 ret = ctxt->nameTab[ctxt->nameNr];
1850 ctxt->nameTab[ctxt->nameNr] = NULL;
1851 return (ret);
1852}
Daniel Veillarda2351322004-06-27 12:08:10 +00001853#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854
1855/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001856 * namePush:
1857 * @ctxt: an XML parser context
1858 * @value: the element name
1859 *
1860 * Pushes a new element name on top of the name stack
1861 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001864int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001865namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001866{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001867 if (ctxt == NULL) return (-1);
1868
Daniel Veillard1c732d22002-11-30 11:22:59 +00001869 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001872 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001873 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001874 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001875 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001878 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001879 }
1880 ctxt->nameTab[ctxt->nameNr] = value;
1881 ctxt->name = value;
1882 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001883mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001884 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886}
1887/**
1888 * namePop:
1889 * @ctxt: an XML parser context
1890 *
1891 * Pops the top element name from the name stack
1892 *
1893 * Returns the name just removed
1894 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001895const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896namePop(xmlParserCtxtPtr ctxt)
1897{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001898 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001899
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001900 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 ctxt->nameNr--;
1903 if (ctxt->nameNr > 0)
1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905 else
1906 ctxt->name = NULL;
1907 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001908 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 return (ret);
1910}
Owen Taylor3473f882001-02-23 17:55:21 +00001911
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001912static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001913 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001914 int *tmp;
1915
Owen Taylor3473f882001-02-23 17:55:21 +00001916 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001917 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001920 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001921 ctxt->spaceMax /=2;
1922 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001923 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001924 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 }
1926 ctxt->spaceTab[ctxt->spaceNr] = val;
1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928 return(ctxt->spaceNr++);
1929}
1930
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001931static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 int ret;
1933 if (ctxt->spaceNr <= 0) return(0);
1934 ctxt->spaceNr--;
1935 if (ctxt->spaceNr > 0)
1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001938 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001939 ret = ctxt->spaceTab[ctxt->spaceNr];
1940 ctxt->spaceTab[ctxt->spaceNr] = -1;
1941 return(ret);
1942}
1943
1944/*
1945 * Macros for accessing the content. Those should be used only by the parser,
1946 * and not exported.
1947 *
1948 * Dirty macros, i.e. one often need to make assumption on the context to
1949 * use them
1950 *
1951 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952 * To be used with extreme caution since operations consuming
1953 * characters may move the input buffer to a different location !
1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955 * This should be used internally by the parser
1956 * only to compare to ASCII values otherwise it would break when
1957 * running with UTF-8 encoding.
1958 * RAW same as CUR but in the input buffer, bypass any token
1959 * extraction that may have been done
1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961 * to compare on ASCII based substring.
1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001963 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967 *
1968 * NEXT Skip to the next character, this does the proper decoding
1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001970 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001971 * CUR_CHAR(l) returns the current unicode character (int), set l
1972 * to the number of xmlChars used for the encoding [0-5].
1973 * CUR_SCHAR same but operate on a string instead of the context
1974 * COPY_BUF copy the current unicode char to the target buffer, increment
1975 * the index
1976 * GROW, SHRINK handling of input buffers
1977 */
1978
Daniel Veillardfdc91562002-07-01 21:52:03 +00001979#define RAW (*ctxt->input->cur)
1980#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001981#define NXT(val) ctxt->input->cur[(val)]
1982#define CUR_PTR ctxt->input->cur
1983
Daniel Veillarda07050d2003-10-19 14:46:32 +00001984#define CMP4( s, c1, c2, c3, c4 ) \
1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997 ((unsigned char *) s)[ 8 ] == c9 )
1998#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000 ((unsigned char *) s)[ 9 ] == c10 )
2001
Owen Taylor3473f882001-02-23 17:55:21 +00002002#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002005 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007 xmlPopInput(ctxt); \
2008 } while (0)
2009
Daniel Veillard0b787f32004-03-26 17:29:53 +00002010#define SKIPL(val) do { \
2011 int skipl; \
2012 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002013 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002014 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002015 } else ctxt->input->col++; \
2016 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002017 ctxt->input->cur++; \
2018 } \
2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 if ((*ctxt->input->cur == 0) && \
2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022 xmlPopInput(ctxt); \
2023 } while (0)
2024
Daniel Veillarda880b122003-04-21 21:36:41 +00002025#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002028 xmlSHRINK (ctxt);
2029
2030static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031 xmlParserInputShrink(ctxt->input);
2032 if ((*ctxt->input->cur == 0) &&
2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002035 }
Owen Taylor3473f882001-02-23 17:55:21 +00002036
Daniel Veillarda880b122003-04-21 21:36:41 +00002037#define GROW if ((ctxt->progressive == 0) && \
2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002039 xmlGROW (ctxt);
2040
2041static void xmlGROW (xmlParserCtxtPtr ctxt) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002042 if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2043 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002044 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002045 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2046 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002047 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002048 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002049 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002050 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002051 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2052 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002053}
Owen Taylor3473f882001-02-23 17:55:21 +00002054
2055#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2056
2057#define NEXT xmlNextChar(ctxt)
2058
Daniel Veillard21a0f912001-02-25 19:54:14 +00002059#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002060 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002061 ctxt->input->cur++; \
2062 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002063 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002064 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2065 }
2066
Owen Taylor3473f882001-02-23 17:55:21 +00002067#define NEXTL(l) do { \
2068 if (*(ctxt->input->cur) == '\n') { \
2069 ctxt->input->line++; ctxt->input->col = 1; \
2070 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002071 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002072 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002073 } while (0)
2074
2075#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2076#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2077
2078#define COPY_BUF(l,b,i,v) \
2079 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002080 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002081
2082/**
2083 * xmlSkipBlankChars:
2084 * @ctxt: the XML parser context
2085 *
2086 * skip all blanks character found at that point in the input streams.
2087 * It pops up finished entities in the process if allowable at that point.
2088 *
2089 * Returns the number of space chars skipped
2090 */
2091
2092int
2093xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002094 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002095
2096 /*
2097 * It's Okay to use CUR/NEXT here since all the blanks are on
2098 * the ASCII range.
2099 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002100 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2101 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002102 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002103 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002104 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002105 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002106 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002107 if (*cur == '\n') {
2108 ctxt->input->line++; ctxt->input->col = 1;
2109 }
2110 cur++;
2111 res++;
2112 if (*cur == 0) {
2113 ctxt->input->cur = cur;
2114 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115 cur = ctxt->input->cur;
2116 }
2117 }
2118 ctxt->input->cur = cur;
2119 } else {
2120 int cur;
2121 do {
2122 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002123 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002124 NEXT;
2125 cur = CUR;
2126 res++;
2127 }
2128 while ((cur == 0) && (ctxt->inputNr > 1) &&
2129 (ctxt->instate != XML_PARSER_COMMENT)) {
2130 xmlPopInput(ctxt);
2131 cur = CUR;
2132 }
2133 /*
2134 * Need to handle support of entities branching here
2135 */
2136 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2137 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2138 }
Owen Taylor3473f882001-02-23 17:55:21 +00002139 return(res);
2140}
2141
2142/************************************************************************
2143 * *
2144 * Commodity functions to handle entities *
2145 * *
2146 ************************************************************************/
2147
2148/**
2149 * xmlPopInput:
2150 * @ctxt: an XML parser context
2151 *
2152 * xmlPopInput: the current input pointed by ctxt->input came to an end
2153 * pop it and return the next char.
2154 *
2155 * Returns the current xmlChar in the parser context
2156 */
2157xmlChar
2158xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002159 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002160 if (xmlParserDebugEntities)
2161 xmlGenericError(xmlGenericErrorContext,
2162 "Popping input %d\n", ctxt->inputNr);
2163 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002164 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002165 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2166 return(xmlPopInput(ctxt));
2167 return(CUR);
2168}
2169
2170/**
2171 * xmlPushInput:
2172 * @ctxt: an XML parser context
2173 * @input: an XML parser input fragment (entity, XML fragment ...).
2174 *
2175 * xmlPushInput: switch to a new input stream which is stacked on top
2176 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002177 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002178 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002179int
Owen Taylor3473f882001-02-23 17:55:21 +00002180xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002181 int ret;
2182 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002183
2184 if (xmlParserDebugEntities) {
2185 if ((ctxt->input != NULL) && (ctxt->input->filename))
2186 xmlGenericError(xmlGenericErrorContext,
2187 "%s(%d): ", ctxt->input->filename,
2188 ctxt->input->line);
2189 xmlGenericError(xmlGenericErrorContext,
2190 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2191 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002192 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002193 if (ctxt->instate == XML_PARSER_EOF)
2194 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002195 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002196 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002197}
2198
2199/**
2200 * xmlParseCharRef:
2201 * @ctxt: an XML parser context
2202 *
2203 * parse Reference declarations
2204 *
2205 * [66] CharRef ::= '&#' [0-9]+ ';' |
2206 * '&#x' [0-9a-fA-F]+ ';'
2207 *
2208 * [ WFC: Legal Character ]
2209 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002210 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002211 *
2212 * Returns the value parsed (as an int), 0 in case of error
2213 */
2214int
2215xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002216 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002217 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002218 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002219
Owen Taylor3473f882001-02-23 17:55:21 +00002220 /*
2221 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2222 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002223 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002224 (NXT(2) == 'x')) {
2225 SKIP(3);
2226 GROW;
2227 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002228 if (count++ > 20) {
2229 count = 0;
2230 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002231 if (ctxt->instate == XML_PARSER_EOF)
2232 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002233 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002234 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002235 val = val * 16 + (CUR - '0');
2236 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2237 val = val * 16 + (CUR - 'a') + 10;
2238 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2239 val = val * 16 + (CUR - 'A') + 10;
2240 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002241 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002242 val = 0;
2243 break;
2244 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002245 if (val > 0x10FFFF)
2246 outofrange = val;
2247
Owen Taylor3473f882001-02-23 17:55:21 +00002248 NEXT;
2249 count++;
2250 }
2251 if (RAW == ';') {
2252 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002253 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002254 ctxt->nbChars ++;
2255 ctxt->input->cur++;
2256 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002257 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002258 SKIP(2);
2259 GROW;
2260 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002261 if (count++ > 20) {
2262 count = 0;
2263 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002264 if (ctxt->instate == XML_PARSER_EOF)
2265 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002266 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002267 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002268 val = val * 10 + (CUR - '0');
2269 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002271 val = 0;
2272 break;
2273 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002274 if (val > 0x10FFFF)
2275 outofrange = val;
2276
Owen Taylor3473f882001-02-23 17:55:21 +00002277 NEXT;
2278 count++;
2279 }
2280 if (RAW == ';') {
2281 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002282 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002283 ctxt->nbChars ++;
2284 ctxt->input->cur++;
2285 }
2286 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002287 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002288 }
2289
2290 /*
2291 * [ WFC: Legal Character ]
2292 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002293 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002294 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002295 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002296 return(val);
2297 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002298 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2299 "xmlParseCharRef: invalid xmlChar value %d\n",
2300 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002301 }
2302 return(0);
2303}
2304
2305/**
2306 * xmlParseStringCharRef:
2307 * @ctxt: an XML parser context
2308 * @str: a pointer to an index in the string
2309 *
2310 * parse Reference declarations, variant parsing from a string rather
2311 * than an an input flow.
2312 *
2313 * [66] CharRef ::= '&#' [0-9]+ ';' |
2314 * '&#x' [0-9a-fA-F]+ ';'
2315 *
2316 * [ WFC: Legal Character ]
2317 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002318 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002319 *
2320 * Returns the value parsed (as an int), 0 in case of error, str will be
2321 * updated to the current value of the index
2322 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002323static int
Owen Taylor3473f882001-02-23 17:55:21 +00002324xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2325 const xmlChar *ptr;
2326 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002327 unsigned int val = 0;
2328 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002329
2330 if ((str == NULL) || (*str == NULL)) return(0);
2331 ptr = *str;
2332 cur = *ptr;
2333 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2334 ptr += 3;
2335 cur = *ptr;
2336 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002337 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002338 val = val * 16 + (cur - '0');
2339 else if ((cur >= 'a') && (cur <= 'f'))
2340 val = val * 16 + (cur - 'a') + 10;
2341 else if ((cur >= 'A') && (cur <= 'F'))
2342 val = val * 16 + (cur - 'A') + 10;
2343 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002344 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002345 val = 0;
2346 break;
2347 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002348 if (val > 0x10FFFF)
2349 outofrange = val;
2350
Owen Taylor3473f882001-02-23 17:55:21 +00002351 ptr++;
2352 cur = *ptr;
2353 }
2354 if (cur == ';')
2355 ptr++;
2356 } else if ((cur == '&') && (ptr[1] == '#')){
2357 ptr += 2;
2358 cur = *ptr;
2359 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002360 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002361 val = val * 10 + (cur - '0');
2362 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002363 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002364 val = 0;
2365 break;
2366 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002367 if (val > 0x10FFFF)
2368 outofrange = val;
2369
Owen Taylor3473f882001-02-23 17:55:21 +00002370 ptr++;
2371 cur = *ptr;
2372 }
2373 if (cur == ';')
2374 ptr++;
2375 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002376 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002377 return(0);
2378 }
2379 *str = ptr;
2380
2381 /*
2382 * [ WFC: Legal Character ]
2383 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002384 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002385 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002386 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002387 return(val);
2388 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002389 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2391 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002392 }
2393 return(0);
2394}
2395
2396/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002397 * xmlNewBlanksWrapperInputStream:
2398 * @ctxt: an XML parser context
2399 * @entity: an Entity pointer
2400 *
2401 * Create a new input stream for wrapping
2402 * blanks around a PEReference
2403 *
2404 * Returns the new input stream or NULL
2405 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002406
Daniel Veillardf5582f12002-06-11 10:08:16 +00002407static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002408
Daniel Veillardf4862f02002-09-10 11:13:43 +00002409static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002410xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2411 xmlParserInputPtr input;
2412 xmlChar *buffer;
2413 size_t length;
2414 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002415 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2416 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002417 return(NULL);
2418 }
2419 if (xmlParserDebugEntities)
2420 xmlGenericError(xmlGenericErrorContext,
2421 "new blanks wrapper for entity: %s\n", entity->name);
2422 input = xmlNewInputStream(ctxt);
2423 if (input == NULL) {
2424 return(NULL);
2425 }
2426 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002427 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002428 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002429 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002430 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002431 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002432 }
2433 buffer [0] = ' ';
2434 buffer [1] = '%';
2435 buffer [length-3] = ';';
2436 buffer [length-2] = ' ';
2437 buffer [length-1] = 0;
2438 memcpy(buffer + 2, entity->name, length - 5);
2439 input->free = deallocblankswrapper;
2440 input->base = buffer;
2441 input->cur = buffer;
2442 input->length = length;
2443 input->end = &buffer[length];
2444 return(input);
2445}
2446
2447/**
Owen Taylor3473f882001-02-23 17:55:21 +00002448 * xmlParserHandlePEReference:
2449 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002450 *
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * [69] PEReference ::= '%' Name ';'
2452 *
2453 * [ WFC: No Recursion ]
2454 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002455 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002456 *
2457 * [ WFC: Entity Declared ]
2458 * In a document without any DTD, a document with only an internal DTD
2459 * subset which contains no parameter entity references, or a document
2460 * with "standalone='yes'", ... ... The declaration of a parameter
2461 * entity must precede any reference to it...
2462 *
2463 * [ VC: Entity Declared ]
2464 * In a document with an external subset or external parameter entities
2465 * with "standalone='no'", ... ... The declaration of a parameter entity
2466 * must precede any reference to it...
2467 *
2468 * [ WFC: In DTD ]
2469 * Parameter-entity references may only appear in the DTD.
2470 * NOTE: misleading but this is handled.
2471 *
2472 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002473 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002474 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002475 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002476 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002477 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002478 */
2479void
2480xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002481 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002482 xmlEntityPtr entity = NULL;
2483 xmlParserInputPtr input;
2484
Owen Taylor3473f882001-02-23 17:55:21 +00002485 if (RAW != '%') return;
2486 switch(ctxt->instate) {
2487 case XML_PARSER_CDATA_SECTION:
2488 return;
2489 case XML_PARSER_COMMENT:
2490 return;
2491 case XML_PARSER_START_TAG:
2492 return;
2493 case XML_PARSER_END_TAG:
2494 return;
2495 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002496 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002497 return;
2498 case XML_PARSER_PROLOG:
2499 case XML_PARSER_START:
2500 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002501 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002502 return;
2503 case XML_PARSER_ENTITY_DECL:
2504 case XML_PARSER_CONTENT:
2505 case XML_PARSER_ATTRIBUTE_VALUE:
2506 case XML_PARSER_PI:
2507 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002508 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002509 /* we just ignore it there */
2510 return;
2511 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002512 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002513 return;
2514 case XML_PARSER_ENTITY_VALUE:
2515 /*
2516 * NOTE: in the case of entity values, we don't do the
2517 * substitution here since we need the literal
2518 * entity value to be able to save the internal
2519 * subset of the document.
2520 * This will be handled by xmlStringDecodeEntities
2521 */
2522 return;
2523 case XML_PARSER_DTD:
2524 /*
2525 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2526 * In the internal DTD subset, parameter-entity references
2527 * can occur only where markup declarations can occur, not
2528 * within markup declarations.
2529 * In that case this is handled in xmlParseMarkupDecl
2530 */
2531 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2532 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002533 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002534 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002535 break;
2536 case XML_PARSER_IGNORE:
2537 return;
2538 }
2539
2540 NEXT;
2541 name = xmlParseName(ctxt);
2542 if (xmlParserDebugEntities)
2543 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002544 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002545 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002546 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002547 } else {
2548 if (RAW == ';') {
2549 NEXT;
2550 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2551 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002552 if (ctxt->instate == XML_PARSER_EOF)
2553 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002554 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002555
Owen Taylor3473f882001-02-23 17:55:21 +00002556 /*
2557 * [ WFC: Entity Declared ]
2558 * In a document without any DTD, a document with only an
2559 * internal DTD subset which contains no parameter entity
2560 * references, or a document with "standalone='yes'", ...
2561 * ... The declaration of a parameter entity must precede
2562 * any reference to it...
2563 */
2564 if ((ctxt->standalone == 1) ||
2565 ((ctxt->hasExternalSubset == 0) &&
2566 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002567 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002568 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002569 } else {
2570 /*
2571 * [ VC: Entity Declared ]
2572 * In a document with an external subset or external
2573 * parameter entities with "standalone='no'", ...
2574 * ... The declaration of a parameter entity must precede
2575 * any reference to it...
2576 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002577 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2578 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2579 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002580 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002581 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002582 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2583 "PEReference: %%%s; not found\n",
2584 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002585 ctxt->valid = 0;
2586 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002587 } else if (ctxt->input->free != deallocblankswrapper) {
2588 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002589 if (xmlPushInput(ctxt, input) < 0)
2590 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002591 } else {
2592 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2593 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002594 xmlChar start[4];
2595 xmlCharEncoding enc;
2596
Owen Taylor3473f882001-02-23 17:55:21 +00002597 /*
2598 * handle the extra spaces added before and after
2599 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002600 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002601 */
2602 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002603 if (xmlPushInput(ctxt, input) < 0)
2604 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002605
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002606 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002607 * Get the 4 first bytes and decode the charset
2608 * if enc != XML_CHAR_ENCODING_NONE
2609 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002610 * Note that, since we may have some non-UTF8
2611 * encoding (like UTF16, bug 135229), the 'length'
2612 * is not known, but we can calculate based upon
2613 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002614 */
2615 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002616 if (ctxt->instate == XML_PARSER_EOF)
2617 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002618 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002619 start[0] = RAW;
2620 start[1] = NXT(1);
2621 start[2] = NXT(2);
2622 start[3] = NXT(3);
2623 enc = xmlDetectCharEncoding(start, 4);
2624 if (enc != XML_CHAR_ENCODING_NONE) {
2625 xmlSwitchEncoding(ctxt, enc);
2626 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002627 }
2628
Owen Taylor3473f882001-02-23 17:55:21 +00002629 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002630 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2631 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002632 xmlParseTextDecl(ctxt);
2633 }
Owen Taylor3473f882001-02-23 17:55:21 +00002634 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002635 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2636 "PEReference: %s is not a parameter entity\n",
2637 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002638 }
2639 }
2640 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002641 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002642 }
Owen Taylor3473f882001-02-23 17:55:21 +00002643 }
2644}
2645
2646/*
2647 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002648 * buffer##_size is expected to be a size_t
2649 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002650 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002651#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002652 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002653 size_t new_size = buffer##_size * 2 + n; \
2654 if (new_size < buffer##_size) goto mem_error; \
2655 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002656 if (tmp == NULL) goto mem_error; \
2657 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002658 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002659}
2660
2661/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002662 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002663 * @ctxt: the parser context
2664 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002665 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002666 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2667 * @end: an end marker xmlChar, 0 if none
2668 * @end2: an end marker xmlChar, 0 if none
2669 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002670 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002671 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002672 *
2673 * [67] Reference ::= EntityRef | CharRef
2674 *
2675 * [69] PEReference ::= '%' Name ';'
2676 *
2677 * Returns A newly allocated string with the substitution done. The caller
2678 * must deallocate it !
2679 */
2680xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002681xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2682 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002683 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002684 size_t buffer_size = 0;
2685 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002686
2687 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002688 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002689 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 xmlEntityPtr ent;
2691 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002692
Daniel Veillarda82b1822004-11-08 16:24:57 +00002693 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002694 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002695 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002696
Daniel Veillard0161e632008-08-28 15:36:32 +00002697 if (((ctxt->depth > 40) &&
2698 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2699 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002700 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002701 return(NULL);
2702 }
2703
2704 /*
2705 * allocate a translation buffer.
2706 */
2707 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002708 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002709 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002710
2711 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002712 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002713 * we are operating on already parsed values.
2714 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002715 if (str < last)
2716 c = CUR_SCHAR(str, l);
2717 else
2718 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002719 while ((c != 0) && (c != end) && /* non input consuming loop */
2720 (c != end2) && (c != end3)) {
2721
2722 if (c == 0) break;
2723 if ((c == '&') && (str[1] == '#')) {
2724 int val = xmlParseStringCharRef(ctxt, &str);
2725 if (val != 0) {
2726 COPY_BUF(0,buffer,nbchars,val);
2727 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002728 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002729 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002730 }
Owen Taylor3473f882001-02-23 17:55:21 +00002731 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2732 if (xmlParserDebugEntities)
2733 xmlGenericError(xmlGenericErrorContext,
2734 "String decoding Entity Reference: %.30s\n",
2735 str);
2736 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002737 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2738 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002739 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002740 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002741 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002742 if ((ent != NULL) &&
2743 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2744 if (ent->content != NULL) {
2745 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002746 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002747 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002748 }
Owen Taylor3473f882001-02-23 17:55:21 +00002749 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002750 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2751 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002752 }
2753 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002754 ctxt->depth++;
2755 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2756 0, 0, 0);
2757 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002758
Owen Taylor3473f882001-02-23 17:55:21 +00002759 if (rep != NULL) {
2760 current = rep;
2761 while (*current != 0) { /* non input consuming loop */
2762 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002763 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002764 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002765 goto int_error;
2766 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002767 }
2768 }
2769 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002770 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002771 }
2772 } else if (ent != NULL) {
2773 int i = xmlStrlen(ent->name);
2774 const xmlChar *cur = ent->name;
2775
2776 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002777 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002778 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002779 }
2780 for (;i > 0;i--)
2781 buffer[nbchars++] = *cur++;
2782 buffer[nbchars++] = ';';
2783 }
2784 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2785 if (xmlParserDebugEntities)
2786 xmlGenericError(xmlGenericErrorContext,
2787 "String decoding PE Reference: %.30s\n", str);
2788 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002789 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2790 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002791 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002792 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002793 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002794 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002795 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002796 }
Owen Taylor3473f882001-02-23 17:55:21 +00002797 ctxt->depth++;
2798 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2799 0, 0, 0);
2800 ctxt->depth--;
2801 if (rep != NULL) {
2802 current = rep;
2803 while (*current != 0) { /* non input consuming loop */
2804 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002805 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002806 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002807 goto int_error;
2808 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002809 }
2810 }
2811 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002812 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002813 }
2814 }
2815 } else {
2816 COPY_BUF(l,buffer,nbchars,c);
2817 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002818 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2819 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002820 }
2821 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002822 if (str < last)
2823 c = CUR_SCHAR(str, l);
2824 else
2825 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002826 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002827 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002828 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002829
2830mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002831 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002832int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002833 if (rep != NULL)
2834 xmlFree(rep);
2835 if (buffer != NULL)
2836 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002837 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002838}
2839
Daniel Veillarde57ec792003-09-10 10:50:59 +00002840/**
2841 * xmlStringDecodeEntities:
2842 * @ctxt: the parser context
2843 * @str: the input string
2844 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2845 * @end: an end marker xmlChar, 0 if none
2846 * @end2: an end marker xmlChar, 0 if none
2847 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002848 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002849 * Takes a entity string content and process to do the adequate substitutions.
2850 *
2851 * [67] Reference ::= EntityRef | CharRef
2852 *
2853 * [69] PEReference ::= '%' Name ';'
2854 *
2855 * Returns A newly allocated string with the substitution done. The caller
2856 * must deallocate it !
2857 */
2858xmlChar *
2859xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2860 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002861 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002862 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2863 end, end2, end3));
2864}
Owen Taylor3473f882001-02-23 17:55:21 +00002865
2866/************************************************************************
2867 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002868 * Commodity functions, cleanup needed ? *
2869 * *
2870 ************************************************************************/
2871
2872/**
2873 * areBlanks:
2874 * @ctxt: an XML parser context
2875 * @str: a xmlChar *
2876 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002877 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002878 *
2879 * Is this a sequence of blank chars that one can ignore ?
2880 *
2881 * Returns 1 if ignorable 0 otherwise.
2882 */
2883
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002884static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2885 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002886 int i, ret;
2887 xmlNodePtr lastChild;
2888
Daniel Veillard05c13a22001-09-09 08:38:09 +00002889 /*
2890 * Don't spend time trying to differentiate them, the same callback is
2891 * used !
2892 */
2893 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002894 return(0);
2895
Owen Taylor3473f882001-02-23 17:55:21 +00002896 /*
2897 * Check for xml:space value.
2898 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002899 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2900 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002901 return(0);
2902
2903 /*
2904 * Check that the string is made of blanks
2905 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002906 if (blank_chars == 0) {
2907 for (i = 0;i < len;i++)
2908 if (!(IS_BLANK_CH(str[i]))) return(0);
2909 }
Owen Taylor3473f882001-02-23 17:55:21 +00002910
2911 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002912 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002913 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002914 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002915 if (ctxt->myDoc != NULL) {
2916 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2917 if (ret == 0) return(1);
2918 if (ret == 1) return(0);
2919 }
2920
2921 /*
2922 * Otherwise, heuristic :-\
2923 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002924 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002925 if ((ctxt->node->children == NULL) &&
2926 (RAW == '<') && (NXT(1) == '/')) return(0);
2927
2928 lastChild = xmlGetLastChild(ctxt->node);
2929 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002930 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2931 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002932 } else if (xmlNodeIsText(lastChild))
2933 return(0);
2934 else if ((ctxt->node->children != NULL) &&
2935 (xmlNodeIsText(ctxt->node->children)))
2936 return(0);
2937 return(1);
2938}
2939
Owen Taylor3473f882001-02-23 17:55:21 +00002940/************************************************************************
2941 * *
2942 * Extra stuff for namespace support *
2943 * Relates to http://www.w3.org/TR/WD-xml-names *
2944 * *
2945 ************************************************************************/
2946
2947/**
2948 * xmlSplitQName:
2949 * @ctxt: an XML parser context
2950 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002951 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002952 *
2953 * parse an UTF8 encoded XML qualified name string
2954 *
2955 * [NS 5] QName ::= (Prefix ':')? LocalPart
2956 *
2957 * [NS 6] Prefix ::= NCName
2958 *
2959 * [NS 7] LocalPart ::= NCName
2960 *
2961 * Returns the local part, and prefix is updated
2962 * to get the Prefix if any.
2963 */
2964
2965xmlChar *
2966xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2967 xmlChar buf[XML_MAX_NAMELEN + 5];
2968 xmlChar *buffer = NULL;
2969 int len = 0;
2970 int max = XML_MAX_NAMELEN;
2971 xmlChar *ret = NULL;
2972 const xmlChar *cur = name;
2973 int c;
2974
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002975 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002976 *prefix = NULL;
2977
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002978 if (cur == NULL) return(NULL);
2979
Owen Taylor3473f882001-02-23 17:55:21 +00002980#ifndef XML_XML_NAMESPACE
2981 /* xml: prefix is not really a namespace */
2982 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2983 (cur[2] == 'l') && (cur[3] == ':'))
2984 return(xmlStrdup(name));
2985#endif
2986
Daniel Veillard597bc482003-07-24 16:08:28 +00002987 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002988 if (cur[0] == ':')
2989 return(xmlStrdup(name));
2990
2991 c = *cur++;
2992 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2993 buf[len++] = c;
2994 c = *cur++;
2995 }
2996 if (len >= max) {
2997 /*
2998 * Okay someone managed to make a huge name, so he's ready to pay
2999 * for the processing speed.
3000 */
3001 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003002
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003003 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003004 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003005 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003006 return(NULL);
3007 }
3008 memcpy(buffer, buf, len);
3009 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3010 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003011 xmlChar *tmp;
3012
Owen Taylor3473f882001-02-23 17:55:21 +00003013 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003014 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003015 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003016 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003017 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003018 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003019 return(NULL);
3020 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003021 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003022 }
3023 buffer[len++] = c;
3024 c = *cur++;
3025 }
3026 buffer[len] = 0;
3027 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003028
Daniel Veillard597bc482003-07-24 16:08:28 +00003029 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003030 if (buffer != NULL)
3031 xmlFree(buffer);
3032 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003033 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003034 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003035
Owen Taylor3473f882001-02-23 17:55:21 +00003036 if (buffer == NULL)
3037 ret = xmlStrndup(buf, len);
3038 else {
3039 ret = buffer;
3040 buffer = NULL;
3041 max = XML_MAX_NAMELEN;
3042 }
3043
3044
3045 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003046 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003047 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003048 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003049 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003050 }
Owen Taylor3473f882001-02-23 17:55:21 +00003051 len = 0;
3052
Daniel Veillardbb284f42002-10-16 18:02:47 +00003053 /*
3054 * Check that the first character is proper to start
3055 * a new name
3056 */
3057 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3058 ((c >= 0x41) && (c <= 0x5A)) ||
3059 (c == '_') || (c == ':'))) {
3060 int l;
3061 int first = CUR_SCHAR(cur, l);
3062
3063 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003064 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003065 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003066 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003067 }
3068 }
3069 cur++;
3070
Owen Taylor3473f882001-02-23 17:55:21 +00003071 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3072 buf[len++] = c;
3073 c = *cur++;
3074 }
3075 if (len >= max) {
3076 /*
3077 * Okay someone managed to make a huge name, so he's ready to pay
3078 * for the processing speed.
3079 */
3080 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003081
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003082 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003083 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003084 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003085 return(NULL);
3086 }
3087 memcpy(buffer, buf, len);
3088 while (c != 0) { /* tested bigname2.xml */
3089 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003090 xmlChar *tmp;
3091
Owen Taylor3473f882001-02-23 17:55:21 +00003092 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003093 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003094 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003095 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003096 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003097 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003098 return(NULL);
3099 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003100 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003101 }
3102 buffer[len++] = c;
3103 c = *cur++;
3104 }
3105 buffer[len] = 0;
3106 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003107
Owen Taylor3473f882001-02-23 17:55:21 +00003108 if (buffer == NULL)
3109 ret = xmlStrndup(buf, len);
3110 else {
3111 ret = buffer;
3112 }
3113 }
3114
3115 return(ret);
3116}
3117
3118/************************************************************************
3119 * *
3120 * The parser itself *
3121 * Relates to http://www.w3.org/TR/REC-xml *
3122 * *
3123 ************************************************************************/
3124
Daniel Veillard34e3f642008-07-29 09:02:27 +00003125/************************************************************************
3126 * *
3127 * Routines to parse Name, NCName and NmToken *
3128 * *
3129 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003130#ifdef DEBUG
3131static unsigned long nbParseName = 0;
3132static unsigned long nbParseNmToken = 0;
3133static unsigned long nbParseNCName = 0;
3134static unsigned long nbParseNCNameComplex = 0;
3135static unsigned long nbParseNameComplex = 0;
3136static unsigned long nbParseStringName = 0;
3137#endif
3138
Daniel Veillard34e3f642008-07-29 09:02:27 +00003139/*
3140 * The two following functions are related to the change of accepted
3141 * characters for Name and NmToken in the Revision 5 of XML-1.0
3142 * They correspond to the modified production [4] and the new production [4a]
3143 * changes in that revision. Also note that the macros used for the
3144 * productions Letter, Digit, CombiningChar and Extender are not needed
3145 * anymore.
3146 * We still keep compatibility to pre-revision5 parsing semantic if the
3147 * new XML_PARSE_OLD10 option is given to the parser.
3148 */
3149static int
3150xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3151 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3152 /*
3153 * Use the new checks of production [4] [4a] amd [5] of the
3154 * Update 5 of XML-1.0
3155 */
3156 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3157 (((c >= 'a') && (c <= 'z')) ||
3158 ((c >= 'A') && (c <= 'Z')) ||
3159 (c == '_') || (c == ':') ||
3160 ((c >= 0xC0) && (c <= 0xD6)) ||
3161 ((c >= 0xD8) && (c <= 0xF6)) ||
3162 ((c >= 0xF8) && (c <= 0x2FF)) ||
3163 ((c >= 0x370) && (c <= 0x37D)) ||
3164 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3165 ((c >= 0x200C) && (c <= 0x200D)) ||
3166 ((c >= 0x2070) && (c <= 0x218F)) ||
3167 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3168 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3169 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3170 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3171 ((c >= 0x10000) && (c <= 0xEFFFF))))
3172 return(1);
3173 } else {
3174 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3175 return(1);
3176 }
3177 return(0);
3178}
3179
3180static int
3181xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3182 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3183 /*
3184 * Use the new checks of production [4] [4a] amd [5] of the
3185 * Update 5 of XML-1.0
3186 */
3187 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3188 (((c >= 'a') && (c <= 'z')) ||
3189 ((c >= 'A') && (c <= 'Z')) ||
3190 ((c >= '0') && (c <= '9')) || /* !start */
3191 (c == '_') || (c == ':') ||
3192 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3193 ((c >= 0xC0) && (c <= 0xD6)) ||
3194 ((c >= 0xD8) && (c <= 0xF6)) ||
3195 ((c >= 0xF8) && (c <= 0x2FF)) ||
3196 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3197 ((c >= 0x370) && (c <= 0x37D)) ||
3198 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3199 ((c >= 0x200C) && (c <= 0x200D)) ||
3200 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3201 ((c >= 0x2070) && (c <= 0x218F)) ||
3202 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3203 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3204 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3205 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3206 ((c >= 0x10000) && (c <= 0xEFFFF))))
3207 return(1);
3208 } else {
3209 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3210 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003211 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003212 (IS_COMBINING(c)) ||
3213 (IS_EXTENDER(c)))
3214 return(1);
3215 }
3216 return(0);
3217}
3218
Daniel Veillarde57ec792003-09-10 10:50:59 +00003219static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003220 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003221
Daniel Veillard34e3f642008-07-29 09:02:27 +00003222static const xmlChar *
3223xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3224 int len = 0, l;
3225 int c;
3226 int count = 0;
3227
Daniel Veillardc6561462009-03-25 10:22:31 +00003228#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003229 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003230#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003231
3232 /*
3233 * Handler for more complex cases
3234 */
3235 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003236 if (ctxt->instate == XML_PARSER_EOF)
3237 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003238 c = CUR_CHAR(l);
3239 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3240 /*
3241 * Use the new checks of production [4] [4a] amd [5] of the
3242 * Update 5 of XML-1.0
3243 */
3244 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3245 (!(((c >= 'a') && (c <= 'z')) ||
3246 ((c >= 'A') && (c <= 'Z')) ||
3247 (c == '_') || (c == ':') ||
3248 ((c >= 0xC0) && (c <= 0xD6)) ||
3249 ((c >= 0xD8) && (c <= 0xF6)) ||
3250 ((c >= 0xF8) && (c <= 0x2FF)) ||
3251 ((c >= 0x370) && (c <= 0x37D)) ||
3252 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3253 ((c >= 0x200C) && (c <= 0x200D)) ||
3254 ((c >= 0x2070) && (c <= 0x218F)) ||
3255 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3256 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3257 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3258 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3259 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3260 return(NULL);
3261 }
3262 len += l;
3263 NEXTL(l);
3264 c = CUR_CHAR(l);
3265 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3266 (((c >= 'a') && (c <= 'z')) ||
3267 ((c >= 'A') && (c <= 'Z')) ||
3268 ((c >= '0') && (c <= '9')) || /* !start */
3269 (c == '_') || (c == ':') ||
3270 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3271 ((c >= 0xC0) && (c <= 0xD6)) ||
3272 ((c >= 0xD8) && (c <= 0xF6)) ||
3273 ((c >= 0xF8) && (c <= 0x2FF)) ||
3274 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3275 ((c >= 0x370) && (c <= 0x37D)) ||
3276 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3277 ((c >= 0x200C) && (c <= 0x200D)) ||
3278 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3279 ((c >= 0x2070) && (c <= 0x218F)) ||
3280 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3281 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3282 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3283 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3284 ((c >= 0x10000) && (c <= 0xEFFFF))
3285 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003286 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003287 count = 0;
3288 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003289 if (ctxt->instate == XML_PARSER_EOF)
3290 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003291 }
3292 len += l;
3293 NEXTL(l);
3294 c = CUR_CHAR(l);
3295 }
3296 } else {
3297 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3298 (!IS_LETTER(c) && (c != '_') &&
3299 (c != ':'))) {
3300 return(NULL);
3301 }
3302 len += l;
3303 NEXTL(l);
3304 c = CUR_CHAR(l);
3305
3306 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3307 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3308 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003309 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003310 (IS_COMBINING(c)) ||
3311 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003312 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003313 count = 0;
3314 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003315 if (ctxt->instate == XML_PARSER_EOF)
3316 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003317 }
3318 len += l;
3319 NEXTL(l);
3320 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003321 if (c == 0) {
3322 count = 0;
3323 GROW;
3324 if (ctxt->instate == XML_PARSER_EOF)
3325 return(NULL);
3326 c = CUR_CHAR(l);
3327 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003328 }
3329 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003330 if ((len > XML_MAX_NAME_LENGTH) &&
3331 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3332 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3333 return(NULL);
3334 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003335 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3336 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3337 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3338}
3339
Owen Taylor3473f882001-02-23 17:55:21 +00003340/**
3341 * xmlParseName:
3342 * @ctxt: an XML parser context
3343 *
3344 * parse an XML name.
3345 *
3346 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3347 * CombiningChar | Extender
3348 *
3349 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3350 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003351 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003352 *
3353 * Returns the Name parsed or NULL
3354 */
3355
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003356const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003357xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003358 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003359 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003360 int count = 0;
3361
3362 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003363
Daniel Veillardc6561462009-03-25 10:22:31 +00003364#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003365 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003366#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003367
Daniel Veillard48b2f892001-02-25 16:11:03 +00003368 /*
3369 * Accelerator for simple ASCII names
3370 */
3371 in = ctxt->input->cur;
3372 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373 ((*in >= 0x41) && (*in <= 0x5A)) ||
3374 (*in == '_') || (*in == ':')) {
3375 in++;
3376 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377 ((*in >= 0x41) && (*in <= 0x5A)) ||
3378 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003379 (*in == '_') || (*in == '-') ||
3380 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003381 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003382 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003383 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003384 if ((count > XML_MAX_NAME_LENGTH) &&
3385 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3386 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3387 return(NULL);
3388 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003389 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003390 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003391 ctxt->nbChars += count;
3392 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003393 if (ret == NULL)
3394 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003395 return(ret);
3396 }
3397 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003398 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003399 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003400}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003401
Daniel Veillard34e3f642008-07-29 09:02:27 +00003402static const xmlChar *
3403xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3404 int len = 0, l;
3405 int c;
3406 int count = 0;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003407 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
Daniel Veillard34e3f642008-07-29 09:02:27 +00003408
Daniel Veillardc6561462009-03-25 10:22:31 +00003409#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003410 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003411#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003412
3413 /*
3414 * Handler for more complex cases
3415 */
3416 GROW;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003417 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003418 c = CUR_CHAR(l);
3419 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3420 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3421 return(NULL);
3422 }
3423
3424 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3425 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003426 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003427 if ((len > XML_MAX_NAME_LENGTH) &&
3428 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3429 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3430 return(NULL);
3431 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 count = 0;
3433 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003434 if (ctxt->instate == XML_PARSER_EOF)
3435 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003436 }
3437 len += l;
3438 NEXTL(l);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003439 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003440 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003441 if (c == 0) {
3442 count = 0;
3443 GROW;
3444 if (ctxt->instate == XML_PARSER_EOF)
3445 return(NULL);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003446 end = ctxt->input->cur;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003447 c = CUR_CHAR(l);
3448 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003449 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003450 if ((len > XML_MAX_NAME_LENGTH) &&
3451 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3452 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3453 return(NULL);
3454 }
Daniel Veillarddcc19502013-05-22 22:56:45 +02003455 return(xmlDictLookup(ctxt->dict, end - len, len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003456}
3457
3458/**
3459 * xmlParseNCName:
3460 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003461 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003462 *
3463 * parse an XML name.
3464 *
3465 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3466 * CombiningChar | Extender
3467 *
3468 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3469 *
3470 * Returns the Name parsed or NULL
3471 */
3472
3473static const xmlChar *
3474xmlParseNCName(xmlParserCtxtPtr ctxt) {
3475 const xmlChar *in;
3476 const xmlChar *ret;
3477 int count = 0;
3478
Daniel Veillardc6561462009-03-25 10:22:31 +00003479#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003480 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003481#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003482
3483 /*
3484 * Accelerator for simple ASCII names
3485 */
3486 in = ctxt->input->cur;
3487 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3488 ((*in >= 0x41) && (*in <= 0x5A)) ||
3489 (*in == '_')) {
3490 in++;
3491 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3492 ((*in >= 0x41) && (*in <= 0x5A)) ||
3493 ((*in >= 0x30) && (*in <= 0x39)) ||
3494 (*in == '_') || (*in == '-') ||
3495 (*in == '.'))
3496 in++;
3497 if ((*in > 0) && (*in < 0x80)) {
3498 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003499 if ((count > XML_MAX_NAME_LENGTH) &&
3500 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3501 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3502 return(NULL);
3503 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3505 ctxt->input->cur = in;
3506 ctxt->nbChars += count;
3507 ctxt->input->col += count;
3508 if (ret == NULL) {
3509 xmlErrMemory(ctxt, NULL);
3510 }
3511 return(ret);
3512 }
3513 }
3514 return(xmlParseNCNameComplex(ctxt));
3515}
3516
Daniel Veillard46de64e2002-05-29 08:21:33 +00003517/**
3518 * xmlParseNameAndCompare:
3519 * @ctxt: an XML parser context
3520 *
3521 * parse an XML name and compares for match
3522 * (specialized for endtag parsing)
3523 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003524 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3525 * and the name for mismatch
3526 */
3527
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003528static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003529xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003530 register const xmlChar *cmp = other;
3531 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003532 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003533
3534 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003535 if (ctxt->instate == XML_PARSER_EOF)
3536 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003537
Daniel Veillard46de64e2002-05-29 08:21:33 +00003538 in = ctxt->input->cur;
3539 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003540 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003541 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003542 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003543 }
William M. Brack76e95df2003-10-18 16:20:14 +00003544 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003545 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003546 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003547 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003548 }
3549 /* failure (or end of input buffer), check with full function */
3550 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003551 /* strings coming from the dictionnary direct compare possible */
3552 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003553 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003554 }
3555 return ret;
3556}
3557
Owen Taylor3473f882001-02-23 17:55:21 +00003558/**
3559 * xmlParseStringName:
3560 * @ctxt: an XML parser context
3561 * @str: a pointer to the string pointer (IN/OUT)
3562 *
3563 * parse an XML name.
3564 *
3565 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3566 * CombiningChar | Extender
3567 *
3568 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3569 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003570 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003571 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003572 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003573 * is updated to the current location in the string.
3574 */
3575
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003576static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003577xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3578 xmlChar buf[XML_MAX_NAMELEN + 5];
3579 const xmlChar *cur = *str;
3580 int len = 0, l;
3581 int c;
3582
Daniel Veillardc6561462009-03-25 10:22:31 +00003583#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003584 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003585#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003586
Owen Taylor3473f882001-02-23 17:55:21 +00003587 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003588 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003589 return(NULL);
3590 }
3591
Daniel Veillard34e3f642008-07-29 09:02:27 +00003592 COPY_BUF(l,buf,len,c);
3593 cur += l;
3594 c = CUR_SCHAR(cur, l);
3595 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003596 COPY_BUF(l,buf,len,c);
3597 cur += l;
3598 c = CUR_SCHAR(cur, l);
3599 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3600 /*
3601 * Okay someone managed to make a huge name, so he's ready to pay
3602 * for the processing speed.
3603 */
3604 xmlChar *buffer;
3605 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003606
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003607 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003608 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003609 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003610 return(NULL);
3611 }
3612 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003613 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003614 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003615 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003616
3617 if ((len > XML_MAX_NAME_LENGTH) &&
3618 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3619 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3620 xmlFree(buffer);
3621 return(NULL);
3622 }
Owen Taylor3473f882001-02-23 17:55:21 +00003623 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003624 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003625 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003626 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003627 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003628 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003629 return(NULL);
3630 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003631 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003632 }
3633 COPY_BUF(l,buffer,len,c);
3634 cur += l;
3635 c = CUR_SCHAR(cur, l);
3636 }
3637 buffer[len] = 0;
3638 *str = cur;
3639 return(buffer);
3640 }
3641 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003642 if ((len > XML_MAX_NAME_LENGTH) &&
3643 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3644 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3645 return(NULL);
3646 }
Owen Taylor3473f882001-02-23 17:55:21 +00003647 *str = cur;
3648 return(xmlStrndup(buf, len));
3649}
3650
3651/**
3652 * xmlParseNmtoken:
3653 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003654 *
Owen Taylor3473f882001-02-23 17:55:21 +00003655 * parse an XML Nmtoken.
3656 *
3657 * [7] Nmtoken ::= (NameChar)+
3658 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003659 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003660 *
3661 * Returns the Nmtoken parsed or NULL
3662 */
3663
3664xmlChar *
3665xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3666 xmlChar buf[XML_MAX_NAMELEN + 5];
3667 int len = 0, l;
3668 int c;
3669 int count = 0;
3670
Daniel Veillardc6561462009-03-25 10:22:31 +00003671#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003672 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003673#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003674
Owen Taylor3473f882001-02-23 17:55:21 +00003675 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003676 if (ctxt->instate == XML_PARSER_EOF)
3677 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003678 c = CUR_CHAR(l);
3679
Daniel Veillard34e3f642008-07-29 09:02:27 +00003680 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003681 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003682 count = 0;
3683 GROW;
3684 }
3685 COPY_BUF(l,buf,len,c);
3686 NEXTL(l);
3687 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003688 if (c == 0) {
3689 count = 0;
3690 GROW;
3691 if (ctxt->instate == XML_PARSER_EOF)
3692 return(NULL);
3693 c = CUR_CHAR(l);
3694 }
Owen Taylor3473f882001-02-23 17:55:21 +00003695 if (len >= XML_MAX_NAMELEN) {
3696 /*
3697 * Okay someone managed to make a huge token, so he's ready to pay
3698 * for the processing speed.
3699 */
3700 xmlChar *buffer;
3701 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003702
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003703 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003704 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003705 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003706 return(NULL);
3707 }
3708 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003709 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003710 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003711 count = 0;
3712 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003713 if (ctxt->instate == XML_PARSER_EOF) {
3714 xmlFree(buffer);
3715 return(NULL);
3716 }
Owen Taylor3473f882001-02-23 17:55:21 +00003717 }
3718 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003719 xmlChar *tmp;
3720
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003721 if ((max > XML_MAX_NAME_LENGTH) &&
3722 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3723 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3724 xmlFree(buffer);
3725 return(NULL);
3726 }
Owen Taylor3473f882001-02-23 17:55:21 +00003727 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003728 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003729 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003730 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003731 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003732 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003733 return(NULL);
3734 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003735 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003736 }
3737 COPY_BUF(l,buffer,len,c);
3738 NEXTL(l);
3739 c = CUR_CHAR(l);
3740 }
3741 buffer[len] = 0;
3742 return(buffer);
3743 }
3744 }
3745 if (len == 0)
3746 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003747 if ((len > XML_MAX_NAME_LENGTH) &&
3748 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3749 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3750 return(NULL);
3751 }
Owen Taylor3473f882001-02-23 17:55:21 +00003752 return(xmlStrndup(buf, len));
3753}
3754
3755/**
3756 * xmlParseEntityValue:
3757 * @ctxt: an XML parser context
3758 * @orig: if non-NULL store a copy of the original entity value
3759 *
3760 * parse a value for ENTITY declarations
3761 *
3762 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3763 * "'" ([^%&'] | PEReference | Reference)* "'"
3764 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003765 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003766 */
3767
3768xmlChar *
3769xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3770 xmlChar *buf = NULL;
3771 int len = 0;
3772 int size = XML_PARSER_BUFFER_SIZE;
3773 int c, l;
3774 xmlChar stop;
3775 xmlChar *ret = NULL;
3776 const xmlChar *cur = NULL;
3777 xmlParserInputPtr input;
3778
3779 if (RAW == '"') stop = '"';
3780 else if (RAW == '\'') stop = '\'';
3781 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003782 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003783 return(NULL);
3784 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003785 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003786 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003787 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003788 return(NULL);
3789 }
3790
3791 /*
3792 * The content of the entity definition is copied in a buffer.
3793 */
3794
3795 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3796 input = ctxt->input;
3797 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003798 if (ctxt->instate == XML_PARSER_EOF) {
3799 xmlFree(buf);
3800 return(NULL);
3801 }
Owen Taylor3473f882001-02-23 17:55:21 +00003802 NEXT;
3803 c = CUR_CHAR(l);
3804 /*
3805 * NOTE: 4.4.5 Included in Literal
3806 * When a parameter entity reference appears in a literal entity
3807 * value, ... a single or double quote character in the replacement
3808 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003809 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003810 * In practice it means we stop the loop only when back at parsing
3811 * the initial entity and the quote is found
3812 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003813 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3814 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003815 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003816 xmlChar *tmp;
3817
Owen Taylor3473f882001-02-23 17:55:21 +00003818 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003819 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3820 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003821 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003822 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003823 return(NULL);
3824 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003825 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003826 }
3827 COPY_BUF(l,buf,len,c);
3828 NEXTL(l);
3829 /*
3830 * Pop-up of finished entities.
3831 */
3832 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3833 xmlPopInput(ctxt);
3834
3835 GROW;
3836 c = CUR_CHAR(l);
3837 if (c == 0) {
3838 GROW;
3839 c = CUR_CHAR(l);
3840 }
3841 }
3842 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003843 if (ctxt->instate == XML_PARSER_EOF) {
3844 xmlFree(buf);
3845 return(NULL);
3846 }
Owen Taylor3473f882001-02-23 17:55:21 +00003847
3848 /*
3849 * Raise problem w.r.t. '&' and '%' being used in non-entities
3850 * reference constructs. Note Charref will be handled in
3851 * xmlStringDecodeEntities()
3852 */
3853 cur = buf;
3854 while (*cur != 0) { /* non input consuming */
3855 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3856 xmlChar *name;
3857 xmlChar tmp = *cur;
3858
3859 cur++;
3860 name = xmlParseStringName(ctxt, &cur);
3861 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003862 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003863 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003864 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003865 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003866 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3867 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003868 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003869 }
3870 if (name != NULL)
3871 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003872 if (*cur == 0)
3873 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003874 }
3875 cur++;
3876 }
3877
3878 /*
3879 * Then PEReference entities are substituted.
3880 */
3881 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003882 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003883 xmlFree(buf);
3884 } else {
3885 NEXT;
3886 /*
3887 * NOTE: 4.4.7 Bypassed
3888 * When a general entity reference appears in the EntityValue in
3889 * an entity declaration, it is bypassed and left as is.
3890 * so XML_SUBSTITUTE_REF is not set here.
3891 */
3892 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3893 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003894 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003895 *orig = buf;
3896 else
3897 xmlFree(buf);
3898 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003899
Owen Taylor3473f882001-02-23 17:55:21 +00003900 return(ret);
3901}
3902
3903/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003904 * xmlParseAttValueComplex:
3905 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003906 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003907 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003908 *
3909 * parse a value for an attribute, this is the fallback function
3910 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003911 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003912 *
3913 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3914 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003915static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003916xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003917 xmlChar limit = 0;
3918 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003919 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003920 size_t len = 0;
3921 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003922 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003923 xmlChar *current = NULL;
3924 xmlEntityPtr ent;
3925
Owen Taylor3473f882001-02-23 17:55:21 +00003926 if (NXT(0) == '"') {
3927 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3928 limit = '"';
3929 NEXT;
3930 } else if (NXT(0) == '\'') {
3931 limit = '\'';
3932 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3933 NEXT;
3934 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003935 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003936 return(NULL);
3937 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003938
Owen Taylor3473f882001-02-23 17:55:21 +00003939 /*
3940 * allocate a translation buffer.
3941 */
3942 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003943 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003944 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003945
3946 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003947 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003948 */
3949 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003950 while (((NXT(0) != limit) && /* checked */
3951 (IS_CHAR(c)) && (c != '<')) &&
3952 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003953 /*
3954 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3955 * special option is given
3956 */
3957 if ((len > XML_MAX_TEXT_LENGTH) &&
3958 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3959 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003960 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003961 goto mem_error;
3962 }
Owen Taylor3473f882001-02-23 17:55:21 +00003963 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003964 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003965 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003966 if (NXT(1) == '#') {
3967 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003968
Owen Taylor3473f882001-02-23 17:55:21 +00003969 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003970 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003971 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003972 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003973 }
3974 buf[len++] = '&';
3975 } else {
3976 /*
3977 * The reparsing will be done in xmlStringGetNodeList()
3978 * called by the attribute() function in SAX.c
3979 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003980 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003981 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003982 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003983 buf[len++] = '&';
3984 buf[len++] = '#';
3985 buf[len++] = '3';
3986 buf[len++] = '8';
3987 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003988 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003989 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003990 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003991 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003992 }
Owen Taylor3473f882001-02-23 17:55:21 +00003993 len += xmlCopyChar(0, &buf[len], val);
3994 }
3995 } else {
3996 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003997 ctxt->nbentities++;
3998 if (ent != NULL)
3999 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004000 if ((ent != NULL) &&
4001 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004002 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004003 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004004 }
4005 if ((ctxt->replaceEntities == 0) &&
4006 (ent->content[0] == '&')) {
4007 buf[len++] = '&';
4008 buf[len++] = '#';
4009 buf[len++] = '3';
4010 buf[len++] = '8';
4011 buf[len++] = ';';
4012 } else {
4013 buf[len++] = ent->content[0];
4014 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004015 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004016 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004017 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4018 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004019 XML_SUBSTITUTE_REF,
4020 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004021 if (rep != NULL) {
4022 current = rep;
4023 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004024 if ((*current == 0xD) || (*current == 0xA) ||
4025 (*current == 0x9)) {
4026 buf[len++] = 0x20;
4027 current++;
4028 } else
4029 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004030 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004031 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004032 }
4033 }
4034 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004035 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004036 }
4037 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004038 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004039 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004040 }
Owen Taylor3473f882001-02-23 17:55:21 +00004041 if (ent->content != NULL)
4042 buf[len++] = ent->content[0];
4043 }
4044 } else if (ent != NULL) {
4045 int i = xmlStrlen(ent->name);
4046 const xmlChar *cur = ent->name;
4047
4048 /*
4049 * This may look absurd but is needed to detect
4050 * entities problems
4051 */
4052 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004053 (ent->content != NULL) && (ent->checked == 0)) {
4054 unsigned long oldnbent = ctxt->nbentities;
4055
Owen Taylor3473f882001-02-23 17:55:21 +00004056 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004057 XML_SUBSTITUTE_REF, 0, 0, 0);
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004058
Daniel Veillardcff25462013-03-11 15:57:55 +08004059 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004060 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004061 if (xmlStrchr(rep, '<'))
4062 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004063 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004064 rep = NULL;
4065 }
Owen Taylor3473f882001-02-23 17:55:21 +00004066 }
4067
4068 /*
4069 * Just output the reference
4070 */
4071 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004072 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004073 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004074 }
4075 for (;i > 0;i--)
4076 buf[len++] = *cur++;
4077 buf[len++] = ';';
4078 }
4079 }
4080 } else {
4081 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004082 if ((len != 0) || (!normalize)) {
4083 if ((!normalize) || (!in_space)) {
4084 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004085 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004086 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004087 }
4088 }
4089 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004090 }
4091 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004092 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004093 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004094 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004095 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004096 }
4097 }
4098 NEXTL(l);
4099 }
4100 GROW;
4101 c = CUR_CHAR(l);
4102 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004103 if (ctxt->instate == XML_PARSER_EOF)
4104 goto error;
4105
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004106 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004107 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004108 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004109 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004110 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004111 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004112 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004113 if ((c != 0) && (!IS_CHAR(c))) {
4114 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4115 "invalid character in attribute value\n");
4116 } else {
4117 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4118 "AttValue: ' expected\n");
4119 }
Owen Taylor3473f882001-02-23 17:55:21 +00004120 } else
4121 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004122
4123 /*
4124 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004125 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004126 */
4127 if (len >= INT_MAX) {
4128 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004129 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004130 goto mem_error;
4131 }
4132
4133 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004134 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004135
4136mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004137 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004138error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004139 if (buf != NULL)
4140 xmlFree(buf);
4141 if (rep != NULL)
4142 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004143 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004144}
4145
4146/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004147 * xmlParseAttValue:
4148 * @ctxt: an XML parser context
4149 *
4150 * parse a value for an attribute
4151 * Note: the parser won't do substitution of entities here, this
4152 * will be handled later in xmlStringGetNodeList
4153 *
4154 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4155 * "'" ([^<&'] | Reference)* "'"
4156 *
4157 * 3.3.3 Attribute-Value Normalization:
4158 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004159 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004160 * - a character reference is processed by appending the referenced
4161 * character to the attribute value
4162 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004163 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004164 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4165 * appending #x20 to the normalized value, except that only a single
4166 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004167 * parsed entity or the literal entity value of an internal parsed entity
4168 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004169 * If the declared value is not CDATA, then the XML processor must further
4170 * process the normalized attribute value by discarding any leading and
4171 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004172 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004173 * All attributes for which no declaration has been read should be treated
4174 * by a non-validating parser as if declared CDATA.
4175 *
4176 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4177 */
4178
4179
4180xmlChar *
4181xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004182 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004183 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004184}
4185
4186/**
Owen Taylor3473f882001-02-23 17:55:21 +00004187 * xmlParseSystemLiteral:
4188 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004189 *
Owen Taylor3473f882001-02-23 17:55:21 +00004190 * parse an XML Literal
4191 *
4192 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4193 *
4194 * Returns the SystemLiteral parsed or NULL
4195 */
4196
4197xmlChar *
4198xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4199 xmlChar *buf = NULL;
4200 int len = 0;
4201 int size = XML_PARSER_BUFFER_SIZE;
4202 int cur, l;
4203 xmlChar stop;
4204 int state = ctxt->instate;
4205 int count = 0;
4206
4207 SHRINK;
4208 if (RAW == '"') {
4209 NEXT;
4210 stop = '"';
4211 } else if (RAW == '\'') {
4212 NEXT;
4213 stop = '\'';
4214 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004215 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004216 return(NULL);
4217 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004218
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004219 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004220 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004221 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004222 return(NULL);
4223 }
4224 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4225 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004226 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004227 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004228 xmlChar *tmp;
4229
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004230 if ((size > XML_MAX_NAME_LENGTH) &&
4231 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4232 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4233 xmlFree(buf);
4234 ctxt->instate = (xmlParserInputState) state;
4235 return(NULL);
4236 }
Owen Taylor3473f882001-02-23 17:55:21 +00004237 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004238 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4239 if (tmp == NULL) {
4240 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004241 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004242 ctxt->instate = (xmlParserInputState) state;
4243 return(NULL);
4244 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004245 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004246 }
4247 count++;
4248 if (count > 50) {
4249 GROW;
4250 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004251 if (ctxt->instate == XML_PARSER_EOF) {
4252 xmlFree(buf);
4253 return(NULL);
4254 }
Owen Taylor3473f882001-02-23 17:55:21 +00004255 }
4256 COPY_BUF(l,buf,len,cur);
4257 NEXTL(l);
4258 cur = CUR_CHAR(l);
4259 if (cur == 0) {
4260 GROW;
4261 SHRINK;
4262 cur = CUR_CHAR(l);
4263 }
4264 }
4265 buf[len] = 0;
4266 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004267 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004268 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004269 } else {
4270 NEXT;
4271 }
4272 return(buf);
4273}
4274
4275/**
4276 * xmlParsePubidLiteral:
4277 * @ctxt: an XML parser context
4278 *
4279 * parse an XML public literal
4280 *
4281 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4282 *
4283 * Returns the PubidLiteral parsed or NULL.
4284 */
4285
4286xmlChar *
4287xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4288 xmlChar *buf = NULL;
4289 int len = 0;
4290 int size = XML_PARSER_BUFFER_SIZE;
4291 xmlChar cur;
4292 xmlChar stop;
4293 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004294 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004295
4296 SHRINK;
4297 if (RAW == '"') {
4298 NEXT;
4299 stop = '"';
4300 } else if (RAW == '\'') {
4301 NEXT;
4302 stop = '\'';
4303 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004304 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004305 return(NULL);
4306 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004307 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004308 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004309 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004310 return(NULL);
4311 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004312 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004313 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004314 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004315 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004316 xmlChar *tmp;
4317
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004318 if ((size > XML_MAX_NAME_LENGTH) &&
4319 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4320 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4321 xmlFree(buf);
4322 return(NULL);
4323 }
Owen Taylor3473f882001-02-23 17:55:21 +00004324 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004325 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4326 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004327 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004328 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004329 return(NULL);
4330 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004331 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004332 }
4333 buf[len++] = cur;
4334 count++;
4335 if (count > 50) {
4336 GROW;
4337 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004338 if (ctxt->instate == XML_PARSER_EOF) {
4339 xmlFree(buf);
4340 return(NULL);
4341 }
Owen Taylor3473f882001-02-23 17:55:21 +00004342 }
4343 NEXT;
4344 cur = CUR;
4345 if (cur == 0) {
4346 GROW;
4347 SHRINK;
4348 cur = CUR;
4349 }
4350 }
4351 buf[len] = 0;
4352 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004353 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004354 } else {
4355 NEXT;
4356 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004357 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004358 return(buf);
4359}
4360
Daniel Veillard8ed10722009-08-20 19:17:36 +02004361static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004362
4363/*
4364 * used for the test in the inner loop of the char data testing
4365 */
4366static const unsigned char test_char_data[256] = {
4367 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4372 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4373 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4374 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4375 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4376 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4377 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4378 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4379 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4380 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4381 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4382 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4383 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4384 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4399};
4400
Owen Taylor3473f882001-02-23 17:55:21 +00004401/**
4402 * xmlParseCharData:
4403 * @ctxt: an XML parser context
4404 * @cdata: int indicating whether we are within a CDATA section
4405 *
4406 * parse a CharData section.
4407 * if we are within a CDATA section ']]>' marks an end of section.
4408 *
4409 * The right angle bracket (>) may be represented using the string "&gt;",
4410 * and must, for compatibility, be escaped using "&gt;" or a character
4411 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004412 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004413 *
4414 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4415 */
4416
4417void
4418xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004419 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004420 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004421 int line = ctxt->input->line;
4422 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004423 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004424
4425 SHRINK;
4426 GROW;
4427 /*
4428 * Accelerated common case where input don't need to be
4429 * modified before passing it to the handler.
4430 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004431 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004432 in = ctxt->input->cur;
4433 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004434get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004435 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004436 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004437 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004438 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004439 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004440 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004441 goto get_more_space;
4442 }
4443 if (*in == '<') {
4444 nbchar = in - ctxt->input->cur;
4445 if (nbchar > 0) {
4446 const xmlChar *tmp = ctxt->input->cur;
4447 ctxt->input->cur = in;
4448
Daniel Veillard34099b42004-11-04 17:34:35 +00004449 if ((ctxt->sax != NULL) &&
4450 (ctxt->sax->ignorableWhitespace !=
4451 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004452 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004453 if (ctxt->sax->ignorableWhitespace != NULL)
4454 ctxt->sax->ignorableWhitespace(ctxt->userData,
4455 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004456 } else {
4457 if (ctxt->sax->characters != NULL)
4458 ctxt->sax->characters(ctxt->userData,
4459 tmp, nbchar);
4460 if (*ctxt->space == -1)
4461 *ctxt->space = -2;
4462 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004463 } else if ((ctxt->sax != NULL) &&
4464 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004465 ctxt->sax->characters(ctxt->userData,
4466 tmp, nbchar);
4467 }
4468 }
4469 return;
4470 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004471
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004472get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004473 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004474 while (test_char_data[*in]) {
4475 in++;
4476 ccol++;
4477 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004478 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004479 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004480 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004481 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004482 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004483 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004484 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004485 }
4486 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004487 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004488 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004489 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004490 return;
4491 }
4492 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004493 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004494 goto get_more;
4495 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004496 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004497 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004498 if ((ctxt->sax != NULL) &&
4499 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004500 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004501 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004502 const xmlChar *tmp = ctxt->input->cur;
4503 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004504
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004505 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004506 if (ctxt->sax->ignorableWhitespace != NULL)
4507 ctxt->sax->ignorableWhitespace(ctxt->userData,
4508 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004509 } else {
4510 if (ctxt->sax->characters != NULL)
4511 ctxt->sax->characters(ctxt->userData,
4512 tmp, nbchar);
4513 if (*ctxt->space == -1)
4514 *ctxt->space = -2;
4515 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004516 line = ctxt->input->line;
4517 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004518 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004519 if (ctxt->sax->characters != NULL)
4520 ctxt->sax->characters(ctxt->userData,
4521 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004522 line = ctxt->input->line;
4523 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004524 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004525 /* something really bad happened in the SAX callback */
4526 if (ctxt->instate != XML_PARSER_CONTENT)
4527 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004528 }
4529 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004530 if (*in == 0xD) {
4531 in++;
4532 if (*in == 0xA) {
4533 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004534 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004535 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004536 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004537 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004538 in--;
4539 }
4540 if (*in == '<') {
4541 return;
4542 }
4543 if (*in == '&') {
4544 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004545 }
4546 SHRINK;
4547 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004548 if (ctxt->instate == XML_PARSER_EOF)
4549 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004550 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004551 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004552 nbchar = 0;
4553 }
Daniel Veillard50582112001-03-26 22:52:16 +00004554 ctxt->input->line = line;
4555 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004556 xmlParseCharDataComplex(ctxt, cdata);
4557}
4558
Daniel Veillard01c13b52002-12-10 15:19:08 +00004559/**
4560 * xmlParseCharDataComplex:
4561 * @ctxt: an XML parser context
4562 * @cdata: int indicating whether we are within a CDATA section
4563 *
4564 * parse a CharData section.this is the fallback function
4565 * of xmlParseCharData() when the parsing requires handling
4566 * of non-ASCII characters.
4567 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004568static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004569xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004570 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4571 int nbchar = 0;
4572 int cur, l;
4573 int count = 0;
4574
4575 SHRINK;
4576 GROW;
4577 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004578 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004579 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004580 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004581 if ((cur == ']') && (NXT(1) == ']') &&
4582 (NXT(2) == '>')) {
4583 if (cdata) break;
4584 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004585 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004586 }
4587 }
4588 COPY_BUF(l,buf,nbchar,cur);
4589 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004590 buf[nbchar] = 0;
4591
Owen Taylor3473f882001-02-23 17:55:21 +00004592 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004593 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004594 */
4595 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004596 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004597 if (ctxt->sax->ignorableWhitespace != NULL)
4598 ctxt->sax->ignorableWhitespace(ctxt->userData,
4599 buf, nbchar);
4600 } else {
4601 if (ctxt->sax->characters != NULL)
4602 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004603 if ((ctxt->sax->characters !=
4604 ctxt->sax->ignorableWhitespace) &&
4605 (*ctxt->space == -1))
4606 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004607 }
4608 }
4609 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004610 /* something really bad happened in the SAX callback */
4611 if (ctxt->instate != XML_PARSER_CONTENT)
4612 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004613 }
4614 count++;
4615 if (count > 50) {
4616 GROW;
4617 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004618 if (ctxt->instate == XML_PARSER_EOF)
4619 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004620 }
4621 NEXTL(l);
4622 cur = CUR_CHAR(l);
4623 }
4624 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004625 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004626 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004627 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004628 */
4629 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004630 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004631 if (ctxt->sax->ignorableWhitespace != NULL)
4632 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4633 } else {
4634 if (ctxt->sax->characters != NULL)
4635 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004636 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4637 (*ctxt->space == -1))
4638 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004639 }
4640 }
4641 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004642 if ((cur != 0) && (!IS_CHAR(cur))) {
4643 /* Generate the error and skip the offending character */
4644 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4645 "PCDATA invalid Char value %d\n",
4646 cur);
4647 NEXTL(l);
4648 }
Owen Taylor3473f882001-02-23 17:55:21 +00004649}
4650
4651/**
4652 * xmlParseExternalID:
4653 * @ctxt: an XML parser context
4654 * @publicID: a xmlChar** receiving PubidLiteral
4655 * @strict: indicate whether we should restrict parsing to only
4656 * production [75], see NOTE below
4657 *
4658 * Parse an External ID or a Public ID
4659 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004660 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004661 * 'PUBLIC' S PubidLiteral S SystemLiteral
4662 *
4663 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4664 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4665 *
4666 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4667 *
4668 * Returns the function returns SystemLiteral and in the second
4669 * case publicID receives PubidLiteral, is strict is off
4670 * it is possible to return NULL and have publicID set.
4671 */
4672
4673xmlChar *
4674xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4675 xmlChar *URI = NULL;
4676
4677 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004678
4679 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004680 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004681 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004682 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004683 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4684 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004685 }
4686 SKIP_BLANKS;
4687 URI = xmlParseSystemLiteral(ctxt);
4688 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004689 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004690 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004691 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004692 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004693 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004694 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004695 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004696 }
4697 SKIP_BLANKS;
4698 *publicID = xmlParsePubidLiteral(ctxt);
4699 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004700 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004701 }
4702 if (strict) {
4703 /*
4704 * We don't handle [83] so "S SystemLiteral" is required.
4705 */
William M. Brack76e95df2003-10-18 16:20:14 +00004706 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004707 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004708 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004709 }
4710 } else {
4711 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004712 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004713 * "S SystemLiteral" is not detected. From a purely parsing
4714 * point of view that's a nice mess.
4715 */
4716 const xmlChar *ptr;
4717 GROW;
4718
4719 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004720 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004721
William M. Brack76e95df2003-10-18 16:20:14 +00004722 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004723 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4724 }
4725 SKIP_BLANKS;
4726 URI = xmlParseSystemLiteral(ctxt);
4727 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004728 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004729 }
4730 }
4731 return(URI);
4732}
4733
4734/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004735 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004736 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004737 * @buf: the already parsed part of the buffer
4738 * @len: number of bytes filles in the buffer
4739 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004740 *
4741 * Skip an XML (SGML) comment <!-- .... -->
4742 * The spec says that "For compatibility, the string "--" (double-hyphen)
4743 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004744 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004745 *
4746 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4747 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004748static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004749xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4750 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004751 int q, ql;
4752 int r, rl;
4753 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004754 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004755 int inputid;
4756
4757 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004758
Owen Taylor3473f882001-02-23 17:55:21 +00004759 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004760 len = 0;
4761 size = XML_PARSER_BUFFER_SIZE;
4762 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4763 if (buf == NULL) {
4764 xmlErrMemory(ctxt, NULL);
4765 return;
4766 }
Owen Taylor3473f882001-02-23 17:55:21 +00004767 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004768 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004769 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004770 if (q == 0)
4771 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004772 if (!IS_CHAR(q)) {
4773 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4774 "xmlParseComment: invalid xmlChar value %d\n",
4775 q);
4776 xmlFree (buf);
4777 return;
4778 }
Owen Taylor3473f882001-02-23 17:55:21 +00004779 NEXTL(ql);
4780 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004781 if (r == 0)
4782 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004783 if (!IS_CHAR(r)) {
4784 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4785 "xmlParseComment: invalid xmlChar value %d\n",
4786 q);
4787 xmlFree (buf);
4788 return;
4789 }
Owen Taylor3473f882001-02-23 17:55:21 +00004790 NEXTL(rl);
4791 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004792 if (cur == 0)
4793 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004794 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004795 ((cur != '>') ||
4796 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004797 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004798 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004799 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004800 if ((len > XML_MAX_TEXT_LENGTH) &&
4801 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4802 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4803 "Comment too big found", NULL);
4804 xmlFree (buf);
4805 return;
4806 }
Owen Taylor3473f882001-02-23 17:55:21 +00004807 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004808 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004809 size_t new_size;
4810
4811 new_size = size * 2;
4812 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004813 if (new_buf == NULL) {
4814 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004815 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004816 return;
4817 }
William M. Bracka3215c72004-07-31 16:24:01 +00004818 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004819 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004820 }
4821 COPY_BUF(ql,buf,len,q);
4822 q = r;
4823 ql = rl;
4824 r = cur;
4825 rl = l;
4826
4827 count++;
4828 if (count > 50) {
4829 GROW;
4830 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004831 if (ctxt->instate == XML_PARSER_EOF) {
4832 xmlFree(buf);
4833 return;
4834 }
Owen Taylor3473f882001-02-23 17:55:21 +00004835 }
4836 NEXTL(l);
4837 cur = CUR_CHAR(l);
4838 if (cur == 0) {
4839 SHRINK;
4840 GROW;
4841 cur = CUR_CHAR(l);
4842 }
4843 }
4844 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004845 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004846 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004847 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004848 } else if (!IS_CHAR(cur)) {
4849 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4850 "xmlParseComment: invalid xmlChar value %d\n",
4851 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004852 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004853 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004854 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4855 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004856 }
4857 NEXT;
4858 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4859 (!ctxt->disableSAX))
4860 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004861 }
Daniel Veillardda629342007-08-01 07:49:06 +00004862 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004863 return;
4864not_terminated:
4865 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4866 "Comment not terminated\n", NULL);
4867 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004868 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004869}
Daniel Veillardda629342007-08-01 07:49:06 +00004870
Daniel Veillard4c778d82005-01-23 17:37:44 +00004871/**
4872 * xmlParseComment:
4873 * @ctxt: an XML parser context
4874 *
4875 * Skip an XML (SGML) comment <!-- .... -->
4876 * The spec says that "For compatibility, the string "--" (double-hyphen)
4877 * must not occur within comments. "
4878 *
4879 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4880 */
4881void
4882xmlParseComment(xmlParserCtxtPtr ctxt) {
4883 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004884 size_t size = XML_PARSER_BUFFER_SIZE;
4885 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004886 xmlParserInputState state;
4887 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004888 size_t nbchar = 0;
4889 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004890 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004891
4892 /*
4893 * Check that there is a comment right here.
4894 */
4895 if ((RAW != '<') || (NXT(1) != '!') ||
4896 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004897 state = ctxt->instate;
4898 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004899 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004900 SKIP(4);
4901 SHRINK;
4902 GROW;
4903
4904 /*
4905 * Accelerated common case where input don't need to be
4906 * modified before passing it to the handler.
4907 */
4908 in = ctxt->input->cur;
4909 do {
4910 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004911 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004912 ctxt->input->line++; ctxt->input->col = 1;
4913 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004914 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004915 }
4916get_more:
4917 ccol = ctxt->input->col;
4918 while (((*in > '-') && (*in <= 0x7F)) ||
4919 ((*in >= 0x20) && (*in < '-')) ||
4920 (*in == 0x09)) {
4921 in++;
4922 ccol++;
4923 }
4924 ctxt->input->col = ccol;
4925 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004926 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004927 ctxt->input->line++; ctxt->input->col = 1;
4928 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004929 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004930 goto get_more;
4931 }
4932 nbchar = in - ctxt->input->cur;
4933 /*
4934 * save current set of data
4935 */
4936 if (nbchar > 0) {
4937 if ((ctxt->sax != NULL) &&
4938 (ctxt->sax->comment != NULL)) {
4939 if (buf == NULL) {
4940 if ((*in == '-') && (in[1] == '-'))
4941 size = nbchar + 1;
4942 else
4943 size = XML_PARSER_BUFFER_SIZE + nbchar;
4944 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4945 if (buf == NULL) {
4946 xmlErrMemory(ctxt, NULL);
4947 ctxt->instate = state;
4948 return;
4949 }
4950 len = 0;
4951 } else if (len + nbchar + 1 >= size) {
4952 xmlChar *new_buf;
4953 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4954 new_buf = (xmlChar *) xmlRealloc(buf,
4955 size * sizeof(xmlChar));
4956 if (new_buf == NULL) {
4957 xmlFree (buf);
4958 xmlErrMemory(ctxt, NULL);
4959 ctxt->instate = state;
4960 return;
4961 }
4962 buf = new_buf;
4963 }
4964 memcpy(&buf[len], ctxt->input->cur, nbchar);
4965 len += nbchar;
4966 buf[len] = 0;
4967 }
4968 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004969 if ((len > XML_MAX_TEXT_LENGTH) &&
4970 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4971 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4972 "Comment too big found", NULL);
4973 xmlFree (buf);
4974 return;
4975 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004976 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004977 if (*in == 0xA) {
4978 in++;
4979 ctxt->input->line++; ctxt->input->col = 1;
4980 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004981 if (*in == 0xD) {
4982 in++;
4983 if (*in == 0xA) {
4984 ctxt->input->cur = in;
4985 in++;
4986 ctxt->input->line++; ctxt->input->col = 1;
4987 continue; /* while */
4988 }
4989 in--;
4990 }
4991 SHRINK;
4992 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004993 if (ctxt->instate == XML_PARSER_EOF) {
4994 xmlFree(buf);
4995 return;
4996 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004997 in = ctxt->input->cur;
4998 if (*in == '-') {
4999 if (in[1] == '-') {
5000 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005001 if (ctxt->input->id != inputid) {
5002 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5003 "comment doesn't start and stop in the same entity\n");
5004 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005005 SKIP(3);
5006 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5007 (!ctxt->disableSAX)) {
5008 if (buf != NULL)
5009 ctxt->sax->comment(ctxt->userData, buf);
5010 else
5011 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5012 }
5013 if (buf != NULL)
5014 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005015 if (ctxt->instate != XML_PARSER_EOF)
5016 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005017 return;
5018 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005019 if (buf != NULL) {
5020 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5021 "Double hyphen within comment: "
5022 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005023 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005024 } else
5025 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5026 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005027 in++;
5028 ctxt->input->col++;
5029 }
5030 in++;
5031 ctxt->input->col++;
5032 goto get_more;
5033 }
5034 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5035 xmlParseCommentComplex(ctxt, buf, len, size);
5036 ctxt->instate = state;
5037 return;
5038}
5039
Owen Taylor3473f882001-02-23 17:55:21 +00005040
5041/**
5042 * xmlParsePITarget:
5043 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005044 *
Owen Taylor3473f882001-02-23 17:55:21 +00005045 * parse the name of a PI
5046 *
5047 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5048 *
5049 * Returns the PITarget name or NULL
5050 */
5051
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005052const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005053xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005054 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005055
5056 name = xmlParseName(ctxt);
5057 if ((name != NULL) &&
5058 ((name[0] == 'x') || (name[0] == 'X')) &&
5059 ((name[1] == 'm') || (name[1] == 'M')) &&
5060 ((name[2] == 'l') || (name[2] == 'L'))) {
5061 int i;
5062 if ((name[0] == 'x') && (name[1] == 'm') &&
5063 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005064 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005065 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005066 return(name);
5067 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005068 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005069 return(name);
5070 }
5071 for (i = 0;;i++) {
5072 if (xmlW3CPIs[i] == NULL) break;
5073 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5074 return(name);
5075 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005076 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5077 "xmlParsePITarget: invalid name prefix 'xml'\n",
5078 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005079 }
Daniel Veillard37334572008-07-31 08:20:02 +00005080 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005081 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005082 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5083 }
Owen Taylor3473f882001-02-23 17:55:21 +00005084 return(name);
5085}
5086
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005087#ifdef LIBXML_CATALOG_ENABLED
5088/**
5089 * xmlParseCatalogPI:
5090 * @ctxt: an XML parser context
5091 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005092 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005093 * parse an XML Catalog Processing Instruction.
5094 *
5095 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5096 *
5097 * Occurs only if allowed by the user and if happening in the Misc
5098 * part of the document before any doctype informations
5099 * This will add the given catalog to the parsing context in order
5100 * to be used if there is a resolution need further down in the document
5101 */
5102
5103static void
5104xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5105 xmlChar *URL = NULL;
5106 const xmlChar *tmp, *base;
5107 xmlChar marker;
5108
5109 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005110 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005111 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5112 goto error;
5113 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005114 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005115 if (*tmp != '=') {
5116 return;
5117 }
5118 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005119 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005120 marker = *tmp;
5121 if ((marker != '\'') && (marker != '"'))
5122 goto error;
5123 tmp++;
5124 base = tmp;
5125 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5126 if (*tmp == 0)
5127 goto error;
5128 URL = xmlStrndup(base, tmp - base);
5129 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005130 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005131 if (*tmp != 0)
5132 goto error;
5133
5134 if (URL != NULL) {
5135 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5136 xmlFree(URL);
5137 }
5138 return;
5139
5140error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005141 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5142 "Catalog PI syntax error: %s\n",
5143 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005144 if (URL != NULL)
5145 xmlFree(URL);
5146}
5147#endif
5148
Owen Taylor3473f882001-02-23 17:55:21 +00005149/**
5150 * xmlParsePI:
5151 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005152 *
Owen Taylor3473f882001-02-23 17:55:21 +00005153 * parse an XML Processing Instruction.
5154 *
5155 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5156 *
5157 * The processing is transfered to SAX once parsed.
5158 */
5159
5160void
5161xmlParsePI(xmlParserCtxtPtr ctxt) {
5162 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005163 size_t len = 0;
5164 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005165 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005166 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005167 xmlParserInputState state;
5168 int count = 0;
5169
5170 if ((RAW == '<') && (NXT(1) == '?')) {
5171 xmlParserInputPtr input = ctxt->input;
5172 state = ctxt->instate;
5173 ctxt->instate = XML_PARSER_PI;
5174 /*
5175 * this is a Processing Instruction.
5176 */
5177 SKIP(2);
5178 SHRINK;
5179
5180 /*
5181 * Parse the target name and check for special support like
5182 * namespace.
5183 */
5184 target = xmlParsePITarget(ctxt);
5185 if (target != NULL) {
5186 if ((RAW == '?') && (NXT(1) == '>')) {
5187 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005188 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5189 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005190 }
5191 SKIP(2);
5192
5193 /*
5194 * SAX: PI detected.
5195 */
5196 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5197 (ctxt->sax->processingInstruction != NULL))
5198 ctxt->sax->processingInstruction(ctxt->userData,
5199 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005200 if (ctxt->instate != XML_PARSER_EOF)
5201 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005202 return;
5203 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005204 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005205 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005206 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005207 ctxt->instate = state;
5208 return;
5209 }
5210 cur = CUR;
5211 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005212 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5213 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005214 }
5215 SKIP_BLANKS;
5216 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005217 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005218 ((cur != '?') || (NXT(1) != '>'))) {
5219 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005220 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005221 size_t new_size = size * 2;
5222 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005223 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005224 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005225 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005226 ctxt->instate = state;
5227 return;
5228 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005229 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005230 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005231 }
5232 count++;
5233 if (count > 50) {
5234 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005235 if (ctxt->instate == XML_PARSER_EOF) {
5236 xmlFree(buf);
5237 return;
5238 }
Owen Taylor3473f882001-02-23 17:55:21 +00005239 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005240 if ((len > XML_MAX_TEXT_LENGTH) &&
5241 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5242 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5243 "PI %s too big found", target);
5244 xmlFree(buf);
5245 ctxt->instate = state;
5246 return;
5247 }
Owen Taylor3473f882001-02-23 17:55:21 +00005248 }
5249 COPY_BUF(l,buf,len,cur);
5250 NEXTL(l);
5251 cur = CUR_CHAR(l);
5252 if (cur == 0) {
5253 SHRINK;
5254 GROW;
5255 cur = CUR_CHAR(l);
5256 }
5257 }
Daniel Veillard51304812012-07-19 20:34:26 +08005258 if ((len > XML_MAX_TEXT_LENGTH) &&
5259 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5260 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5261 "PI %s too big found", target);
5262 xmlFree(buf);
5263 ctxt->instate = state;
5264 return;
5265 }
Owen Taylor3473f882001-02-23 17:55:21 +00005266 buf[len] = 0;
5267 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005268 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5269 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 } else {
5271 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005272 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5273 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005274 }
5275 SKIP(2);
5276
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005277#ifdef LIBXML_CATALOG_ENABLED
5278 if (((state == XML_PARSER_MISC) ||
5279 (state == XML_PARSER_START)) &&
5280 (xmlStrEqual(target, XML_CATALOG_PI))) {
5281 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5282 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5283 (allow == XML_CATA_ALLOW_ALL))
5284 xmlParseCatalogPI(ctxt, buf);
5285 }
5286#endif
5287
5288
Owen Taylor3473f882001-02-23 17:55:21 +00005289 /*
5290 * SAX: PI detected.
5291 */
5292 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5293 (ctxt->sax->processingInstruction != NULL))
5294 ctxt->sax->processingInstruction(ctxt->userData,
5295 target, buf);
5296 }
5297 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005298 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005299 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005300 }
Chris Evans77404b82011-12-14 16:18:25 +08005301 if (ctxt->instate != XML_PARSER_EOF)
5302 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005303 }
5304}
5305
5306/**
5307 * xmlParseNotationDecl:
5308 * @ctxt: an XML parser context
5309 *
5310 * parse a notation declaration
5311 *
5312 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5313 *
5314 * Hence there is actually 3 choices:
5315 * 'PUBLIC' S PubidLiteral
5316 * 'PUBLIC' S PubidLiteral S SystemLiteral
5317 * and 'SYSTEM' S SystemLiteral
5318 *
5319 * See the NOTE on xmlParseExternalID().
5320 */
5321
5322void
5323xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005324 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005325 xmlChar *Pubid;
5326 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005327
Daniel Veillarda07050d2003-10-19 14:46:32 +00005328 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005329 xmlParserInputPtr input = ctxt->input;
5330 SHRINK;
5331 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005332 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005333 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5334 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005335 return;
5336 }
5337 SKIP_BLANKS;
5338
Daniel Veillard76d66f42001-05-16 21:05:17 +00005339 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005340 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005341 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005342 return;
5343 }
William M. Brack76e95df2003-10-18 16:20:14 +00005344 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005345 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005346 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005347 return;
5348 }
Daniel Veillard37334572008-07-31 08:20:02 +00005349 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005350 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005351 "colon are forbidden from notation names '%s'\n",
5352 name, NULL, NULL);
5353 }
Owen Taylor3473f882001-02-23 17:55:21 +00005354 SKIP_BLANKS;
5355
5356 /*
5357 * Parse the IDs.
5358 */
5359 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5360 SKIP_BLANKS;
5361
5362 if (RAW == '>') {
5363 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005364 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5365 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005366 }
5367 NEXT;
5368 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5369 (ctxt->sax->notationDecl != NULL))
5370 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5371 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005372 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005373 }
Owen Taylor3473f882001-02-23 17:55:21 +00005374 if (Systemid != NULL) xmlFree(Systemid);
5375 if (Pubid != NULL) xmlFree(Pubid);
5376 }
5377}
5378
5379/**
5380 * xmlParseEntityDecl:
5381 * @ctxt: an XML parser context
5382 *
5383 * parse <!ENTITY declarations
5384 *
5385 * [70] EntityDecl ::= GEDecl | PEDecl
5386 *
5387 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5388 *
5389 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5390 *
5391 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5392 *
5393 * [74] PEDef ::= EntityValue | ExternalID
5394 *
5395 * [76] NDataDecl ::= S 'NDATA' S Name
5396 *
5397 * [ VC: Notation Declared ]
5398 * The Name must match the declared name of a notation.
5399 */
5400
5401void
5402xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005403 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005404 xmlChar *value = NULL;
5405 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005406 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005407 int isParameter = 0;
5408 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005409 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005410
Daniel Veillard4c778d82005-01-23 17:37:44 +00005411 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005412 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005413 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005414 SHRINK;
5415 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005416 skipped = SKIP_BLANKS;
5417 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005418 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5419 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005420 }
Owen Taylor3473f882001-02-23 17:55:21 +00005421
5422 if (RAW == '%') {
5423 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005424 skipped = SKIP_BLANKS;
5425 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005426 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5427 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005428 }
Owen Taylor3473f882001-02-23 17:55:21 +00005429 isParameter = 1;
5430 }
5431
Daniel Veillard76d66f42001-05-16 21:05:17 +00005432 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005433 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005434 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5435 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005436 return;
5437 }
Daniel Veillard37334572008-07-31 08:20:02 +00005438 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005439 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005440 "colon are forbidden from entities names '%s'\n",
5441 name, NULL, NULL);
5442 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005443 skipped = SKIP_BLANKS;
5444 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005445 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5446 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005447 }
Owen Taylor3473f882001-02-23 17:55:21 +00005448
Daniel Veillardf5582f12002-06-11 10:08:16 +00005449 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005450 /*
5451 * handle the various case of definitions...
5452 */
5453 if (isParameter) {
5454 if ((RAW == '"') || (RAW == '\'')) {
5455 value = xmlParseEntityValue(ctxt, &orig);
5456 if (value) {
5457 if ((ctxt->sax != NULL) &&
5458 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5459 ctxt->sax->entityDecl(ctxt->userData, name,
5460 XML_INTERNAL_PARAMETER_ENTITY,
5461 NULL, NULL, value);
5462 }
5463 } else {
5464 URI = xmlParseExternalID(ctxt, &literal, 1);
5465 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005466 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005467 }
5468 if (URI) {
5469 xmlURIPtr uri;
5470
5471 uri = xmlParseURI((const char *) URI);
5472 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005473 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5474 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005475 /*
5476 * This really ought to be a well formedness error
5477 * but the XML Core WG decided otherwise c.f. issue
5478 * E26 of the XML erratas.
5479 */
Owen Taylor3473f882001-02-23 17:55:21 +00005480 } else {
5481 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005482 /*
5483 * Okay this is foolish to block those but not
5484 * invalid URIs.
5485 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005486 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005487 } else {
5488 if ((ctxt->sax != NULL) &&
5489 (!ctxt->disableSAX) &&
5490 (ctxt->sax->entityDecl != NULL))
5491 ctxt->sax->entityDecl(ctxt->userData, name,
5492 XML_EXTERNAL_PARAMETER_ENTITY,
5493 literal, URI, NULL);
5494 }
5495 xmlFreeURI(uri);
5496 }
5497 }
5498 }
5499 } else {
5500 if ((RAW == '"') || (RAW == '\'')) {
5501 value = xmlParseEntityValue(ctxt, &orig);
5502 if ((ctxt->sax != NULL) &&
5503 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5504 ctxt->sax->entityDecl(ctxt->userData, name,
5505 XML_INTERNAL_GENERAL_ENTITY,
5506 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005507 /*
5508 * For expat compatibility in SAX mode.
5509 */
5510 if ((ctxt->myDoc == NULL) ||
5511 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5512 if (ctxt->myDoc == NULL) {
5513 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005514 if (ctxt->myDoc == NULL) {
5515 xmlErrMemory(ctxt, "New Doc failed");
5516 return;
5517 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005518 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005519 }
5520 if (ctxt->myDoc->intSubset == NULL)
5521 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5522 BAD_CAST "fake", NULL, NULL);
5523
Daniel Veillard1af9a412003-08-20 22:54:39 +00005524 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5525 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005526 }
Owen Taylor3473f882001-02-23 17:55:21 +00005527 } else {
5528 URI = xmlParseExternalID(ctxt, &literal, 1);
5529 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005530 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005531 }
5532 if (URI) {
5533 xmlURIPtr uri;
5534
5535 uri = xmlParseURI((const char *)URI);
5536 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005537 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5538 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005539 /*
5540 * This really ought to be a well formedness error
5541 * but the XML Core WG decided otherwise c.f. issue
5542 * E26 of the XML erratas.
5543 */
Owen Taylor3473f882001-02-23 17:55:21 +00005544 } else {
5545 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005546 /*
5547 * Okay this is foolish to block those but not
5548 * invalid URIs.
5549 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005550 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005551 }
5552 xmlFreeURI(uri);
5553 }
5554 }
William M. Brack76e95df2003-10-18 16:20:14 +00005555 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005556 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5557 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005558 }
5559 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005560 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005561 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005562 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005563 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5564 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
5566 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005567 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005568 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5569 (ctxt->sax->unparsedEntityDecl != NULL))
5570 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5571 literal, URI, ndata);
5572 } else {
5573 if ((ctxt->sax != NULL) &&
5574 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5575 ctxt->sax->entityDecl(ctxt->userData, name,
5576 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5577 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005578 /*
5579 * For expat compatibility in SAX mode.
5580 * assuming the entity repalcement was asked for
5581 */
5582 if ((ctxt->replaceEntities != 0) &&
5583 ((ctxt->myDoc == NULL) ||
5584 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5585 if (ctxt->myDoc == NULL) {
5586 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005587 if (ctxt->myDoc == NULL) {
5588 xmlErrMemory(ctxt, "New Doc failed");
5589 return;
5590 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005591 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005592 }
5593
5594 if (ctxt->myDoc->intSubset == NULL)
5595 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005597 xmlSAX2EntityDecl(ctxt, name,
5598 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5599 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005600 }
Owen Taylor3473f882001-02-23 17:55:21 +00005601 }
5602 }
5603 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005604 if (ctxt->instate == XML_PARSER_EOF)
5605 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005606 SKIP_BLANKS;
5607 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005608 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005609 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005610 } else {
5611 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005612 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5613 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005614 }
5615 NEXT;
5616 }
5617 if (orig != NULL) {
5618 /*
5619 * Ugly mechanism to save the raw entity value.
5620 */
5621 xmlEntityPtr cur = NULL;
5622
5623 if (isParameter) {
5624 if ((ctxt->sax != NULL) &&
5625 (ctxt->sax->getParameterEntity != NULL))
5626 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5627 } else {
5628 if ((ctxt->sax != NULL) &&
5629 (ctxt->sax->getEntity != NULL))
5630 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005631 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005632 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005633 }
Owen Taylor3473f882001-02-23 17:55:21 +00005634 }
5635 if (cur != NULL) {
5636 if (cur->orig != NULL)
5637 xmlFree(orig);
5638 else
5639 cur->orig = orig;
5640 } else
5641 xmlFree(orig);
5642 }
Owen Taylor3473f882001-02-23 17:55:21 +00005643 if (value != NULL) xmlFree(value);
5644 if (URI != NULL) xmlFree(URI);
5645 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005646 }
5647}
5648
5649/**
5650 * xmlParseDefaultDecl:
5651 * @ctxt: an XML parser context
5652 * @value: Receive a possible fixed default value for the attribute
5653 *
5654 * Parse an attribute default declaration
5655 *
5656 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5657 *
5658 * [ VC: Required Attribute ]
5659 * if the default declaration is the keyword #REQUIRED, then the
5660 * attribute must be specified for all elements of the type in the
5661 * attribute-list declaration.
5662 *
5663 * [ VC: Attribute Default Legal ]
5664 * The declared default value must meet the lexical constraints of
5665 * the declared attribute type c.f. xmlValidateAttributeDecl()
5666 *
5667 * [ VC: Fixed Attribute Default ]
5668 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005669 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005670 *
5671 * [ WFC: No < in Attribute Values ]
5672 * handled in xmlParseAttValue()
5673 *
5674 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005675 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005676 */
5677
5678int
5679xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5680 int val;
5681 xmlChar *ret;
5682
5683 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005684 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005685 SKIP(9);
5686 return(XML_ATTRIBUTE_REQUIRED);
5687 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005688 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005689 SKIP(8);
5690 return(XML_ATTRIBUTE_IMPLIED);
5691 }
5692 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005693 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005694 SKIP(6);
5695 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005696 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005697 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5698 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005699 }
5700 SKIP_BLANKS;
5701 }
5702 ret = xmlParseAttValue(ctxt);
5703 ctxt->instate = XML_PARSER_DTD;
5704 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005705 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005706 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005707 } else
5708 *value = ret;
5709 return(val);
5710}
5711
5712/**
5713 * xmlParseNotationType:
5714 * @ctxt: an XML parser context
5715 *
5716 * parse an Notation attribute type.
5717 *
5718 * Note: the leading 'NOTATION' S part has already being parsed...
5719 *
5720 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5721 *
5722 * [ VC: Notation Attributes ]
5723 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005724 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005725 *
5726 * Returns: the notation attribute tree built while parsing
5727 */
5728
5729xmlEnumerationPtr
5730xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005731 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005732 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005733
5734 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005735 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005736 return(NULL);
5737 }
5738 SHRINK;
5739 do {
5740 NEXT;
5741 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005742 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005743 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005744 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5745 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005746 xmlFreeEnumeration(ret);
5747 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005748 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005749 tmp = ret;
5750 while (tmp != NULL) {
5751 if (xmlStrEqual(name, tmp->name)) {
5752 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5753 "standalone: attribute notation value token %s duplicated\n",
5754 name, NULL);
5755 if (!xmlDictOwns(ctxt->dict, name))
5756 xmlFree((xmlChar *) name);
5757 break;
5758 }
5759 tmp = tmp->next;
5760 }
5761 if (tmp == NULL) {
5762 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005763 if (cur == NULL) {
5764 xmlFreeEnumeration(ret);
5765 return(NULL);
5766 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005767 if (last == NULL) ret = last = cur;
5768 else {
5769 last->next = cur;
5770 last = cur;
5771 }
Owen Taylor3473f882001-02-23 17:55:21 +00005772 }
5773 SKIP_BLANKS;
5774 } while (RAW == '|');
5775 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005776 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005777 xmlFreeEnumeration(ret);
5778 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005779 }
5780 NEXT;
5781 return(ret);
5782}
5783
5784/**
5785 * xmlParseEnumerationType:
5786 * @ctxt: an XML parser context
5787 *
5788 * parse an Enumeration attribute type.
5789 *
5790 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5791 *
5792 * [ VC: Enumeration ]
5793 * Values of this type must match one of the Nmtoken tokens in
5794 * the declaration
5795 *
5796 * Returns: the enumeration attribute tree built while parsing
5797 */
5798
5799xmlEnumerationPtr
5800xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5801 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005802 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005803
5804 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005805 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005806 return(NULL);
5807 }
5808 SHRINK;
5809 do {
5810 NEXT;
5811 SKIP_BLANKS;
5812 name = xmlParseNmtoken(ctxt);
5813 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005814 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005815 return(ret);
5816 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005817 tmp = ret;
5818 while (tmp != NULL) {
5819 if (xmlStrEqual(name, tmp->name)) {
5820 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5821 "standalone: attribute enumeration value token %s duplicated\n",
5822 name, NULL);
5823 if (!xmlDictOwns(ctxt->dict, name))
5824 xmlFree(name);
5825 break;
5826 }
5827 tmp = tmp->next;
5828 }
5829 if (tmp == NULL) {
5830 cur = xmlCreateEnumeration(name);
5831 if (!xmlDictOwns(ctxt->dict, name))
5832 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005833 if (cur == NULL) {
5834 xmlFreeEnumeration(ret);
5835 return(NULL);
5836 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005837 if (last == NULL) ret = last = cur;
5838 else {
5839 last->next = cur;
5840 last = cur;
5841 }
Owen Taylor3473f882001-02-23 17:55:21 +00005842 }
5843 SKIP_BLANKS;
5844 } while (RAW == '|');
5845 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005846 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005847 return(ret);
5848 }
5849 NEXT;
5850 return(ret);
5851}
5852
5853/**
5854 * xmlParseEnumeratedType:
5855 * @ctxt: an XML parser context
5856 * @tree: the enumeration tree built while parsing
5857 *
5858 * parse an Enumerated attribute type.
5859 *
5860 * [57] EnumeratedType ::= NotationType | Enumeration
5861 *
5862 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5863 *
5864 *
5865 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5866 */
5867
5868int
5869xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005870 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005871 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005872 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005873 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5874 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005875 return(0);
5876 }
5877 SKIP_BLANKS;
5878 *tree = xmlParseNotationType(ctxt);
5879 if (*tree == NULL) return(0);
5880 return(XML_ATTRIBUTE_NOTATION);
5881 }
5882 *tree = xmlParseEnumerationType(ctxt);
5883 if (*tree == NULL) return(0);
5884 return(XML_ATTRIBUTE_ENUMERATION);
5885}
5886
5887/**
5888 * xmlParseAttributeType:
5889 * @ctxt: an XML parser context
5890 * @tree: the enumeration tree built while parsing
5891 *
5892 * parse the Attribute list def for an element
5893 *
5894 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5895 *
5896 * [55] StringType ::= 'CDATA'
5897 *
5898 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5899 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5900 *
5901 * Validity constraints for attribute values syntax are checked in
5902 * xmlValidateAttributeValue()
5903 *
5904 * [ VC: ID ]
5905 * Values of type ID must match the Name production. A name must not
5906 * appear more than once in an XML document as a value of this type;
5907 * i.e., ID values must uniquely identify the elements which bear them.
5908 *
5909 * [ VC: One ID per Element Type ]
5910 * No element type may have more than one ID attribute specified.
5911 *
5912 * [ VC: ID Attribute Default ]
5913 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5914 *
5915 * [ VC: IDREF ]
5916 * Values of type IDREF must match the Name production, and values
5917 * of type IDREFS must match Names; each IDREF Name must match the value
5918 * of an ID attribute on some element in the XML document; i.e. IDREF
5919 * values must match the value of some ID attribute.
5920 *
5921 * [ VC: Entity Name ]
5922 * Values of type ENTITY must match the Name production, values
5923 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005924 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005925 *
5926 * [ VC: Name Token ]
5927 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005928 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005929 *
5930 * Returns the attribute type
5931 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005932int
Owen Taylor3473f882001-02-23 17:55:21 +00005933xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5934 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005935 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005936 SKIP(5);
5937 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005938 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005939 SKIP(6);
5940 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005941 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005942 SKIP(5);
5943 return(XML_ATTRIBUTE_IDREF);
5944 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5945 SKIP(2);
5946 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005947 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005948 SKIP(6);
5949 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005950 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005951 SKIP(8);
5952 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005953 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005954 SKIP(8);
5955 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005956 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005957 SKIP(7);
5958 return(XML_ATTRIBUTE_NMTOKEN);
5959 }
5960 return(xmlParseEnumeratedType(ctxt, tree));
5961}
5962
5963/**
5964 * xmlParseAttributeListDecl:
5965 * @ctxt: an XML parser context
5966 *
5967 * : parse the Attribute list def for an element
5968 *
5969 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5970 *
5971 * [53] AttDef ::= S Name S AttType S DefaultDecl
5972 *
5973 */
5974void
5975xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005976 const xmlChar *elemName;
5977 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005978 xmlEnumerationPtr tree;
5979
Daniel Veillarda07050d2003-10-19 14:46:32 +00005980 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005981 xmlParserInputPtr input = ctxt->input;
5982
5983 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005984 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005985 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005986 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005987 }
5988 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005989 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005990 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005991 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5992 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005993 return;
5994 }
5995 SKIP_BLANKS;
5996 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005997 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005998 const xmlChar *check = CUR_PTR;
5999 int type;
6000 int def;
6001 xmlChar *defaultValue = NULL;
6002
6003 GROW;
6004 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006005 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006006 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006007 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6008 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006009 break;
6010 }
6011 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006012 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006014 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006015 break;
6016 }
6017 SKIP_BLANKS;
6018
6019 type = xmlParseAttributeType(ctxt, &tree);
6020 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006021 break;
6022 }
6023
6024 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006025 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006026 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6027 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006028 if (tree != NULL)
6029 xmlFreeEnumeration(tree);
6030 break;
6031 }
6032 SKIP_BLANKS;
6033
6034 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6035 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006036 if (defaultValue != NULL)
6037 xmlFree(defaultValue);
6038 if (tree != NULL)
6039 xmlFreeEnumeration(tree);
6040 break;
6041 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006042 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6043 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006044
6045 GROW;
6046 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006047 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006048 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006049 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006050 if (defaultValue != NULL)
6051 xmlFree(defaultValue);
6052 if (tree != NULL)
6053 xmlFreeEnumeration(tree);
6054 break;
6055 }
6056 SKIP_BLANKS;
6057 }
6058 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006059 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6060 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006061 if (defaultValue != NULL)
6062 xmlFree(defaultValue);
6063 if (tree != NULL)
6064 xmlFreeEnumeration(tree);
6065 break;
6066 }
6067 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6068 (ctxt->sax->attributeDecl != NULL))
6069 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6070 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006071 else if (tree != NULL)
6072 xmlFreeEnumeration(tree);
6073
6074 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006075 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006076 (def != XML_ATTRIBUTE_REQUIRED)) {
6077 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6078 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006079 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006080 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6081 }
Owen Taylor3473f882001-02-23 17:55:21 +00006082 if (defaultValue != NULL)
6083 xmlFree(defaultValue);
6084 GROW;
6085 }
6086 if (RAW == '>') {
6087 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006088 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6089 "Attribute list declaration doesn't start and stop in the same entity\n",
6090 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006091 }
6092 NEXT;
6093 }
Owen Taylor3473f882001-02-23 17:55:21 +00006094 }
6095}
6096
6097/**
6098 * xmlParseElementMixedContentDecl:
6099 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006100 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006101 *
6102 * parse the declaration for a Mixed Element content
6103 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006104 *
Owen Taylor3473f882001-02-23 17:55:21 +00006105 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6106 * '(' S? '#PCDATA' S? ')'
6107 *
6108 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6109 *
6110 * [ VC: No Duplicate Types ]
6111 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006112 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006113 *
6114 * returns: the list of the xmlElementContentPtr describing the element choices
6115 */
6116xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006117xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006118 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006119 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006120
6121 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006122 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006123 SKIP(7);
6124 SKIP_BLANKS;
6125 SHRINK;
6126 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006127 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006128 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6129"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006130 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006131 }
Owen Taylor3473f882001-02-23 17:55:21 +00006132 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006133 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006134 if (ret == NULL)
6135 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006136 if (RAW == '*') {
6137 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6138 NEXT;
6139 }
6140 return(ret);
6141 }
6142 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006143 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006144 if (ret == NULL) return(NULL);
6145 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006146 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006147 NEXT;
6148 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006149 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006150 if (ret == NULL) return(NULL);
6151 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006152 if (cur != NULL)
6153 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006154 cur = ret;
6155 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006156 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006157 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006158 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006159 if (n->c1 != NULL)
6160 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006161 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006162 if (n != NULL)
6163 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006164 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006165 }
6166 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006167 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006168 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006169 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006170 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006171 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 return(NULL);
6173 }
6174 SKIP_BLANKS;
6175 GROW;
6176 }
6177 if ((RAW == ')') && (NXT(1) == '*')) {
6178 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006179 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006180 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006181 if (cur->c2 != NULL)
6182 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006183 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006184 if (ret != NULL)
6185 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006186 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006187 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6188"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006189 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006190 }
Owen Taylor3473f882001-02-23 17:55:21 +00006191 SKIP(2);
6192 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006193 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006194 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006195 return(NULL);
6196 }
6197
6198 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006199 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006200 }
6201 return(ret);
6202}
6203
6204/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006205 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006206 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006207 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006208 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006209 *
6210 * parse the declaration for a Mixed Element content
6211 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006212 *
Owen Taylor3473f882001-02-23 17:55:21 +00006213 *
6214 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6215 *
6216 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6217 *
6218 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6219 *
6220 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6221 *
6222 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6223 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006224 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006225 * opening or closing parentheses in a choice, seq, or Mixed
6226 * construct is contained in the replacement text for a parameter
6227 * entity, both must be contained in the same replacement text. For
6228 * interoperability, if a parameter-entity reference appears in a
6229 * choice, seq, or Mixed construct, its replacement text should not
6230 * be empty, and neither the first nor last non-blank character of
6231 * the replacement text should be a connector (| or ,).
6232 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006233 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006234 * hierarchy.
6235 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006236static xmlElementContentPtr
6237xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6238 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006239 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006240 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006241 xmlChar type = 0;
6242
Daniel Veillard489f9672009-08-10 16:49:30 +02006243 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6244 (depth > 2048)) {
6245 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6246"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6247 depth);
6248 return(NULL);
6249 }
Owen Taylor3473f882001-02-23 17:55:21 +00006250 SKIP_BLANKS;
6251 GROW;
6252 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006253 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006254
Owen Taylor3473f882001-02-23 17:55:21 +00006255 /* Recurse on first child */
6256 NEXT;
6257 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006258 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6259 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006260 SKIP_BLANKS;
6261 GROW;
6262 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006263 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006264 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006265 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006266 return(NULL);
6267 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006268 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006269 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006270 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006271 return(NULL);
6272 }
Owen Taylor3473f882001-02-23 17:55:21 +00006273 GROW;
6274 if (RAW == '?') {
6275 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6276 NEXT;
6277 } else if (RAW == '*') {
6278 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6279 NEXT;
6280 } else if (RAW == '+') {
6281 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6282 NEXT;
6283 } else {
6284 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6285 }
Owen Taylor3473f882001-02-23 17:55:21 +00006286 GROW;
6287 }
6288 SKIP_BLANKS;
6289 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006290 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006291 /*
6292 * Each loop we parse one separator and one element.
6293 */
6294 if (RAW == ',') {
6295 if (type == 0) type = CUR;
6296
6297 /*
6298 * Detect "Name | Name , Name" error
6299 */
6300 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006301 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006302 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006303 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006304 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006305 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006306 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006307 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006308 return(NULL);
6309 }
6310 NEXT;
6311
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006312 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006313 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006314 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006315 xmlFreeDocElementContent(ctxt->myDoc, last);
6316 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006317 return(NULL);
6318 }
6319 if (last == NULL) {
6320 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006321 if (ret != NULL)
6322 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006323 ret = cur = op;
6324 } else {
6325 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006326 if (op != NULL)
6327 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006328 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006329 if (last != NULL)
6330 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006331 cur =op;
6332 last = NULL;
6333 }
6334 } else if (RAW == '|') {
6335 if (type == 0) type = CUR;
6336
6337 /*
6338 * Detect "Name , Name | Name" error
6339 */
6340 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006341 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006342 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006343 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006344 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006345 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006346 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006347 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006348 return(NULL);
6349 }
6350 NEXT;
6351
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006352 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006353 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006354 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006355 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006356 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006357 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006358 return(NULL);
6359 }
6360 if (last == NULL) {
6361 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006362 if (ret != NULL)
6363 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006364 ret = cur = op;
6365 } else {
6366 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006367 if (op != NULL)
6368 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006369 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006370 if (last != NULL)
6371 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006372 cur =op;
6373 last = NULL;
6374 }
6375 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006376 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006377 if ((last != NULL) && (last != ret))
6378 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006379 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006380 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006381 return(NULL);
6382 }
6383 GROW;
6384 SKIP_BLANKS;
6385 GROW;
6386 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006387 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006388 /* Recurse on second child */
6389 NEXT;
6390 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006391 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6392 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006393 SKIP_BLANKS;
6394 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006395 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006396 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006397 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006398 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006399 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006400 return(NULL);
6401 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006402 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006403 if (last == NULL) {
6404 if (ret != NULL)
6405 xmlFreeDocElementContent(ctxt->myDoc, ret);
6406 return(NULL);
6407 }
Owen Taylor3473f882001-02-23 17:55:21 +00006408 if (RAW == '?') {
6409 last->ocur = XML_ELEMENT_CONTENT_OPT;
6410 NEXT;
6411 } else if (RAW == '*') {
6412 last->ocur = XML_ELEMENT_CONTENT_MULT;
6413 NEXT;
6414 } else if (RAW == '+') {
6415 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6416 NEXT;
6417 } else {
6418 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6419 }
6420 }
6421 SKIP_BLANKS;
6422 GROW;
6423 }
6424 if ((cur != NULL) && (last != NULL)) {
6425 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006426 if (last != NULL)
6427 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006428 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006429 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006430 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6431"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006432 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006433 }
Owen Taylor3473f882001-02-23 17:55:21 +00006434 NEXT;
6435 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006436 if (ret != NULL) {
6437 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6438 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6439 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6440 else
6441 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6442 }
Owen Taylor3473f882001-02-23 17:55:21 +00006443 NEXT;
6444 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006445 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006446 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006447 cur = ret;
6448 /*
6449 * Some normalization:
6450 * (a | b* | c?)* == (a | b | c)*
6451 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006452 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006453 if ((cur->c1 != NULL) &&
6454 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6456 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6457 if ((cur->c2 != NULL) &&
6458 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6459 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6460 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6461 cur = cur->c2;
6462 }
6463 }
Owen Taylor3473f882001-02-23 17:55:21 +00006464 NEXT;
6465 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006466 if (ret != NULL) {
6467 int found = 0;
6468
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006469 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6470 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6471 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006472 else
6473 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006474 /*
6475 * Some normalization:
6476 * (a | b*)+ == (a | b)*
6477 * (a | b?)+ == (a | b)*
6478 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006479 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006480 if ((cur->c1 != NULL) &&
6481 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6484 found = 1;
6485 }
6486 if ((cur->c2 != NULL) &&
6487 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6488 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6489 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6490 found = 1;
6491 }
6492 cur = cur->c2;
6493 }
6494 if (found)
6495 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6496 }
Owen Taylor3473f882001-02-23 17:55:21 +00006497 NEXT;
6498 }
6499 return(ret);
6500}
6501
6502/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006503 * xmlParseElementChildrenContentDecl:
6504 * @ctxt: an XML parser context
6505 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006506 *
6507 * parse the declaration for a Mixed Element content
6508 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6509 *
6510 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6511 *
6512 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6513 *
6514 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6515 *
6516 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6517 *
6518 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6519 * TODO Parameter-entity replacement text must be properly nested
6520 * with parenthesized groups. That is to say, if either of the
6521 * opening or closing parentheses in a choice, seq, or Mixed
6522 * construct is contained in the replacement text for a parameter
6523 * entity, both must be contained in the same replacement text. For
6524 * interoperability, if a parameter-entity reference appears in a
6525 * choice, seq, or Mixed construct, its replacement text should not
6526 * be empty, and neither the first nor last non-blank character of
6527 * the replacement text should be a connector (| or ,).
6528 *
6529 * Returns the tree of xmlElementContentPtr describing the element
6530 * hierarchy.
6531 */
6532xmlElementContentPtr
6533xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6534 /* stub left for API/ABI compat */
6535 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6536}
6537
6538/**
Owen Taylor3473f882001-02-23 17:55:21 +00006539 * xmlParseElementContentDecl:
6540 * @ctxt: an XML parser context
6541 * @name: the name of the element being defined.
6542 * @result: the Element Content pointer will be stored here if any
6543 *
6544 * parse the declaration for an Element content either Mixed or Children,
6545 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006546 *
Owen Taylor3473f882001-02-23 17:55:21 +00006547 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6548 *
6549 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6550 */
6551
6552int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006553xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006554 xmlElementContentPtr *result) {
6555
6556 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006557 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006558 int res;
6559
6560 *result = NULL;
6561
6562 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006563 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006564 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006565 return(-1);
6566 }
6567 NEXT;
6568 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006569 if (ctxt->instate == XML_PARSER_EOF)
6570 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006571 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006572 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006573 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006574 res = XML_ELEMENT_TYPE_MIXED;
6575 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006576 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006577 res = XML_ELEMENT_TYPE_ELEMENT;
6578 }
Owen Taylor3473f882001-02-23 17:55:21 +00006579 SKIP_BLANKS;
6580 *result = tree;
6581 return(res);
6582}
6583
6584/**
6585 * xmlParseElementDecl:
6586 * @ctxt: an XML parser context
6587 *
6588 * parse an Element declaration.
6589 *
6590 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6591 *
6592 * [ VC: Unique Element Type Declaration ]
6593 * No element type may be declared more than once
6594 *
6595 * Returns the type of the element, or -1 in case of error
6596 */
6597int
6598xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006599 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006600 int ret = -1;
6601 xmlElementContentPtr content = NULL;
6602
Daniel Veillard4c778d82005-01-23 17:37:44 +00006603 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006604 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006605 xmlParserInputPtr input = ctxt->input;
6606
6607 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006608 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006609 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6610 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006611 }
6612 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006613 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006614 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006615 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6616 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006617 return(-1);
6618 }
6619 while ((RAW == 0) && (ctxt->inputNr > 1))
6620 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006621 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006622 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6623 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006624 }
6625 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006626 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006627 SKIP(5);
6628 /*
6629 * Element must always be empty.
6630 */
6631 ret = XML_ELEMENT_TYPE_EMPTY;
6632 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6633 (NXT(2) == 'Y')) {
6634 SKIP(3);
6635 /*
6636 * Element is a generic container.
6637 */
6638 ret = XML_ELEMENT_TYPE_ANY;
6639 } else if (RAW == '(') {
6640 ret = xmlParseElementContentDecl(ctxt, name, &content);
6641 } else {
6642 /*
6643 * [ WFC: PEs in Internal Subset ] error handling.
6644 */
6645 if ((RAW == '%') && (ctxt->external == 0) &&
6646 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006647 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006648 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006649 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006650 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006651 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6652 }
Owen Taylor3473f882001-02-23 17:55:21 +00006653 return(-1);
6654 }
6655
6656 SKIP_BLANKS;
6657 /*
6658 * Pop-up of finished entities.
6659 */
6660 while ((RAW == 0) && (ctxt->inputNr > 1))
6661 xmlPopInput(ctxt);
6662 SKIP_BLANKS;
6663
6664 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006665 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006666 if (content != NULL) {
6667 xmlFreeDocElementContent(ctxt->myDoc, content);
6668 }
Owen Taylor3473f882001-02-23 17:55:21 +00006669 } else {
6670 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006671 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6672 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006673 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006674
Owen Taylor3473f882001-02-23 17:55:21 +00006675 NEXT;
6676 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006677 (ctxt->sax->elementDecl != NULL)) {
6678 if (content != NULL)
6679 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006680 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6681 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006682 if ((content != NULL) && (content->parent == NULL)) {
6683 /*
6684 * this is a trick: if xmlAddElementDecl is called,
6685 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006686 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006687 * interfaces or change the API/ABI
6688 */
6689 xmlFreeDocElementContent(ctxt->myDoc, content);
6690 }
6691 } else if (content != NULL) {
6692 xmlFreeDocElementContent(ctxt->myDoc, content);
6693 }
Owen Taylor3473f882001-02-23 17:55:21 +00006694 }
Owen Taylor3473f882001-02-23 17:55:21 +00006695 }
6696 return(ret);
6697}
6698
6699/**
Owen Taylor3473f882001-02-23 17:55:21 +00006700 * xmlParseConditionalSections
6701 * @ctxt: an XML parser context
6702 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006703 * [61] conditionalSect ::= includeSect | ignoreSect
6704 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006705 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6706 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6707 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6708 */
6709
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006710static void
Owen Taylor3473f882001-02-23 17:55:21 +00006711xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006712 int id = ctxt->input->id;
6713
Owen Taylor3473f882001-02-23 17:55:21 +00006714 SKIP(3);
6715 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006716 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006717 SKIP(7);
6718 SKIP_BLANKS;
6719 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006720 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006721 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006722 if (ctxt->input->id != id) {
6723 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6724 "All markup of the conditional section is not in the same entity\n",
6725 NULL, NULL);
6726 }
Owen Taylor3473f882001-02-23 17:55:21 +00006727 NEXT;
6728 }
6729 if (xmlParserDebugEntities) {
6730 if ((ctxt->input != NULL) && (ctxt->input->filename))
6731 xmlGenericError(xmlGenericErrorContext,
6732 "%s(%d): ", ctxt->input->filename,
6733 ctxt->input->line);
6734 xmlGenericError(xmlGenericErrorContext,
6735 "Entering INCLUDE Conditional Section\n");
6736 }
6737
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006738 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6739 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006740 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006741 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006742
6743 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6744 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006745 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006746 NEXT;
6747 } else if (RAW == '%') {
6748 xmlParsePEReference(ctxt);
6749 } else
6750 xmlParseMarkupDecl(ctxt);
6751
6752 /*
6753 * Pop-up of finished entities.
6754 */
6755 while ((RAW == 0) && (ctxt->inputNr > 1))
6756 xmlPopInput(ctxt);
6757
Daniel Veillardfdc91562002-07-01 21:52:03 +00006758 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006759 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006760 break;
6761 }
6762 }
6763 if (xmlParserDebugEntities) {
6764 if ((ctxt->input != NULL) && (ctxt->input->filename))
6765 xmlGenericError(xmlGenericErrorContext,
6766 "%s(%d): ", ctxt->input->filename,
6767 ctxt->input->line);
6768 xmlGenericError(xmlGenericErrorContext,
6769 "Leaving INCLUDE Conditional Section\n");
6770 }
6771
Daniel Veillarda07050d2003-10-19 14:46:32 +00006772 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006773 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006774 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006775 int depth = 0;
6776
6777 SKIP(6);
6778 SKIP_BLANKS;
6779 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006780 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006781 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006782 if (ctxt->input->id != id) {
6783 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6784 "All markup of the conditional section is not in the same entity\n",
6785 NULL, NULL);
6786 }
Owen Taylor3473f882001-02-23 17:55:21 +00006787 NEXT;
6788 }
6789 if (xmlParserDebugEntities) {
6790 if ((ctxt->input != NULL) && (ctxt->input->filename))
6791 xmlGenericError(xmlGenericErrorContext,
6792 "%s(%d): ", ctxt->input->filename,
6793 ctxt->input->line);
6794 xmlGenericError(xmlGenericErrorContext,
6795 "Entering IGNORE Conditional Section\n");
6796 }
6797
6798 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006799 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006800 * But disable SAX event generating DTD building in the meantime
6801 */
6802 state = ctxt->disableSAX;
6803 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006804 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006805 ctxt->instate = XML_PARSER_IGNORE;
6806
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006807 while (((depth >= 0) && (RAW != 0)) &&
6808 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006809 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6810 depth++;
6811 SKIP(3);
6812 continue;
6813 }
6814 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6815 if (--depth >= 0) SKIP(3);
6816 continue;
6817 }
6818 NEXT;
6819 continue;
6820 }
6821
6822 ctxt->disableSAX = state;
6823 ctxt->instate = instate;
6824
6825 if (xmlParserDebugEntities) {
6826 if ((ctxt->input != NULL) && (ctxt->input->filename))
6827 xmlGenericError(xmlGenericErrorContext,
6828 "%s(%d): ", ctxt->input->filename,
6829 ctxt->input->line);
6830 xmlGenericError(xmlGenericErrorContext,
6831 "Leaving IGNORE Conditional Section\n");
6832 }
6833
6834 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006835 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006836 }
6837
6838 if (RAW == 0)
6839 SHRINK;
6840
6841 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006842 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006843 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006844 if (ctxt->input->id != id) {
6845 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6846 "All markup of the conditional section is not in the same entity\n",
6847 NULL, NULL);
6848 }
Owen Taylor3473f882001-02-23 17:55:21 +00006849 SKIP(3);
6850 }
6851}
6852
6853/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006854 * xmlParseMarkupDecl:
6855 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006856 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006857 * parse Markup declarations
6858 *
6859 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6860 * NotationDecl | PI | Comment
6861 *
6862 * [ VC: Proper Declaration/PE Nesting ]
6863 * Parameter-entity replacement text must be properly nested with
6864 * markup declarations. That is to say, if either the first character
6865 * or the last character of a markup declaration (markupdecl above) is
6866 * contained in the replacement text for a parameter-entity reference,
6867 * both must be contained in the same replacement text.
6868 *
6869 * [ WFC: PEs in Internal Subset ]
6870 * In the internal DTD subset, parameter-entity references can occur
6871 * only where markup declarations can occur, not within markup declarations.
6872 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006873 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006874 */
6875void
6876xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6877 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006878 if (CUR == '<') {
6879 if (NXT(1) == '!') {
6880 switch (NXT(2)) {
6881 case 'E':
6882 if (NXT(3) == 'L')
6883 xmlParseElementDecl(ctxt);
6884 else if (NXT(3) == 'N')
6885 xmlParseEntityDecl(ctxt);
6886 break;
6887 case 'A':
6888 xmlParseAttributeListDecl(ctxt);
6889 break;
6890 case 'N':
6891 xmlParseNotationDecl(ctxt);
6892 break;
6893 case '-':
6894 xmlParseComment(ctxt);
6895 break;
6896 default:
6897 /* there is an error but it will be detected later */
6898 break;
6899 }
6900 } else if (NXT(1) == '?') {
6901 xmlParsePI(ctxt);
6902 }
6903 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006904 /*
6905 * This is only for internal subset. On external entities,
6906 * the replacement is done before parsing stage
6907 */
6908 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6909 xmlParsePEReference(ctxt);
6910
6911 /*
6912 * Conditional sections are allowed from entities included
6913 * by PE References in the internal subset.
6914 */
6915 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6916 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6917 xmlParseConditionalSections(ctxt);
6918 }
6919 }
6920
6921 ctxt->instate = XML_PARSER_DTD;
6922}
6923
6924/**
6925 * xmlParseTextDecl:
6926 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006927 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006928 * parse an XML declaration header for external entities
6929 *
6930 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006931 */
6932
6933void
6934xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6935 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006936 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006937
6938 /*
6939 * We know that '<?xml' is here.
6940 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006941 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006942 SKIP(5);
6943 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006944 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006945 return;
6946 }
6947
William M. Brack76e95df2003-10-18 16:20:14 +00006948 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006949 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6950 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006951 }
6952 SKIP_BLANKS;
6953
6954 /*
6955 * We may have the VersionInfo here.
6956 */
6957 version = xmlParseVersionInfo(ctxt);
6958 if (version == NULL)
6959 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006960 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006961 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006962 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6963 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006964 }
6965 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006966 ctxt->input->version = version;
6967
6968 /*
6969 * We must have the encoding declaration
6970 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006971 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006972 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6973 /*
6974 * The XML REC instructs us to stop parsing right here
6975 */
6976 return;
6977 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006978 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6979 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6980 "Missing encoding in text declaration\n");
6981 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006982
6983 SKIP_BLANKS;
6984 if ((RAW == '?') && (NXT(1) == '>')) {
6985 SKIP(2);
6986 } else if (RAW == '>') {
6987 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006988 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006989 NEXT;
6990 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006991 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006992 MOVETO_ENDTAG(CUR_PTR);
6993 NEXT;
6994 }
6995}
6996
6997/**
Owen Taylor3473f882001-02-23 17:55:21 +00006998 * xmlParseExternalSubset:
6999 * @ctxt: an XML parser context
7000 * @ExternalID: the external identifier
7001 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007002 *
Owen Taylor3473f882001-02-23 17:55:21 +00007003 * parse Markup declarations from an external subset
7004 *
7005 * [30] extSubset ::= textDecl? extSubsetDecl
7006 *
7007 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7008 */
7009void
7010xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7011 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007012 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007013 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007014
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007015 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007016 (ctxt->input->end - ctxt->input->cur >= 4)) {
7017 xmlChar start[4];
7018 xmlCharEncoding enc;
7019
7020 start[0] = RAW;
7021 start[1] = NXT(1);
7022 start[2] = NXT(2);
7023 start[3] = NXT(3);
7024 enc = xmlDetectCharEncoding(start, 4);
7025 if (enc != XML_CHAR_ENCODING_NONE)
7026 xmlSwitchEncoding(ctxt, enc);
7027 }
7028
Daniel Veillarda07050d2003-10-19 14:46:32 +00007029 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007030 xmlParseTextDecl(ctxt);
7031 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7032 /*
7033 * The XML REC instructs us to stop parsing right here
7034 */
7035 ctxt->instate = XML_PARSER_EOF;
7036 return;
7037 }
7038 }
7039 if (ctxt->myDoc == NULL) {
7040 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007041 if (ctxt->myDoc == NULL) {
7042 xmlErrMemory(ctxt, "New Doc failed");
7043 return;
7044 }
7045 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007046 }
7047 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7048 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7049
7050 ctxt->instate = XML_PARSER_DTD;
7051 ctxt->external = 1;
7052 while (((RAW == '<') && (NXT(1) == '?')) ||
7053 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007054 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007055 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007056 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007057
7058 GROW;
7059 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7060 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007061 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007062 NEXT;
7063 } else if (RAW == '%') {
7064 xmlParsePEReference(ctxt);
7065 } else
7066 xmlParseMarkupDecl(ctxt);
7067
7068 /*
7069 * Pop-up of finished entities.
7070 */
7071 while ((RAW == 0) && (ctxt->inputNr > 1))
7072 xmlPopInput(ctxt);
7073
Daniel Veillardfdc91562002-07-01 21:52:03 +00007074 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007075 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007076 break;
7077 }
7078 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007079
Owen Taylor3473f882001-02-23 17:55:21 +00007080 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007081 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007082 }
7083
7084}
7085
7086/**
7087 * xmlParseReference:
7088 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007089 *
Owen Taylor3473f882001-02-23 17:55:21 +00007090 * parse and handle entity references in content, depending on the SAX
7091 * interface, this may end-up in a call to character() if this is a
7092 * CharRef, a predefined entity, if there is no reference() callback.
7093 * or if the parser was asked to switch to that mode.
7094 *
7095 * [67] Reference ::= EntityRef | CharRef
7096 */
7097void
7098xmlParseReference(xmlParserCtxtPtr ctxt) {
7099 xmlEntityPtr ent;
7100 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007101 int was_checked;
7102 xmlNodePtr list = NULL;
7103 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007104
Daniel Veillard0161e632008-08-28 15:36:32 +00007105
7106 if (RAW != '&')
7107 return;
7108
7109 /*
7110 * Simple case of a CharRef
7111 */
Owen Taylor3473f882001-02-23 17:55:21 +00007112 if (NXT(1) == '#') {
7113 int i = 0;
7114 xmlChar out[10];
7115 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007116 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007117
Daniel Veillarddc171602008-03-26 17:41:38 +00007118 if (value == 0)
7119 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007120 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7121 /*
7122 * So we are using non-UTF-8 buffers
7123 * Check that the char fit on 8bits, if not
7124 * generate a CharRef.
7125 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007126 if (value <= 0xFF) {
7127 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007128 out[1] = 0;
7129 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7130 (!ctxt->disableSAX))
7131 ctxt->sax->characters(ctxt->userData, out, 1);
7132 } else {
7133 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007134 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007135 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007136 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007137 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7138 (!ctxt->disableSAX))
7139 ctxt->sax->reference(ctxt->userData, out);
7140 }
7141 } else {
7142 /*
7143 * Just encode the value in UTF-8
7144 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007145 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007146 out[i] = 0;
7147 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7148 (!ctxt->disableSAX))
7149 ctxt->sax->characters(ctxt->userData, out, i);
7150 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007151 return;
7152 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007153
Daniel Veillard0161e632008-08-28 15:36:32 +00007154 /*
7155 * We are seeing an entity reference
7156 */
7157 ent = xmlParseEntityRef(ctxt);
7158 if (ent == NULL) return;
7159 if (!ctxt->wellFormed)
7160 return;
7161 was_checked = ent->checked;
7162
7163 /* special case of predefined entities */
7164 if ((ent->name == NULL) ||
7165 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7166 val = ent->content;
7167 if (val == NULL) return;
7168 /*
7169 * inline the entity.
7170 */
7171 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7172 (!ctxt->disableSAX))
7173 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7174 return;
7175 }
7176
7177 /*
7178 * The first reference to the entity trigger a parsing phase
7179 * where the ent->children is filled with the result from
7180 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007181 * Note: external parsed entities will not be loaded, it is not
7182 * required for a non-validating parser, unless the parsing option
7183 * of validating, or substituting entities were given. Doing so is
7184 * far more secure as the parser will only process data coming from
7185 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007186 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007187 if ((ent->checked == 0) &&
7188 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7189 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007190 unsigned long oldnbent = ctxt->nbentities;
7191
7192 /*
7193 * This is a bit hackish but this seems the best
7194 * way to make sure both SAX and DOM entity support
7195 * behaves okay.
7196 */
7197 void *user_data;
7198 if (ctxt->userData == ctxt)
7199 user_data = NULL;
7200 else
7201 user_data = ctxt->userData;
7202
7203 /*
7204 * Check that this entity is well formed
7205 * 4.3.2: An internal general parsed entity is well-formed
7206 * if its replacement text matches the production labeled
7207 * content.
7208 */
7209 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7210 ctxt->depth++;
7211 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7212 user_data, &list);
7213 ctxt->depth--;
7214
7215 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7216 ctxt->depth++;
7217 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7218 user_data, ctxt->depth, ent->URI,
7219 ent->ExternalID, &list);
7220 ctxt->depth--;
7221 } else {
7222 ret = XML_ERR_ENTITY_PE_INTERNAL;
7223 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7224 "invalid entity type found\n", NULL);
7225 }
7226
7227 /*
7228 * Store the number of entities needing parsing for this entity
7229 * content and do checkings
7230 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007231 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7232 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7233 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007234 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007235 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007236 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007237 return;
7238 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007239 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007240 xmlFreeNodeList(list);
7241 return;
7242 }
Owen Taylor3473f882001-02-23 17:55:21 +00007243
Daniel Veillard0161e632008-08-28 15:36:32 +00007244 if ((ret == XML_ERR_OK) && (list != NULL)) {
7245 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7246 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7247 (ent->children == NULL)) {
7248 ent->children = list;
7249 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007250 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007251 * Prune it directly in the generated document
7252 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007253 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007254 if (((list->type == XML_TEXT_NODE) &&
7255 (list->next == NULL)) ||
7256 (ctxt->parseMode == XML_PARSE_READER)) {
7257 list->parent = (xmlNodePtr) ent;
7258 list = NULL;
7259 ent->owner = 1;
7260 } else {
7261 ent->owner = 0;
7262 while (list != NULL) {
7263 list->parent = (xmlNodePtr) ctxt->node;
7264 list->doc = ctxt->myDoc;
7265 if (list->next == NULL)
7266 ent->last = list;
7267 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007268 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007269 list = ent->children;
7270#ifdef LIBXML_LEGACY_ENABLED
7271 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7272 xmlAddEntityReference(ent, list, NULL);
7273#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007274 }
7275 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007276 ent->owner = 1;
7277 while (list != NULL) {
7278 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007279 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007280 if (list->next == NULL)
7281 ent->last = list;
7282 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007283 }
7284 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007285 } else {
7286 xmlFreeNodeList(list);
7287 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007288 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007289 } else if ((ret != XML_ERR_OK) &&
7290 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7291 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7292 "Entity '%s' failed to parse\n", ent->name);
7293 } else if (list != NULL) {
7294 xmlFreeNodeList(list);
7295 list = NULL;
7296 }
7297 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007298 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007299 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007300 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007301 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007302
Daniel Veillard0161e632008-08-28 15:36:32 +00007303 /*
7304 * Now that the entity content has been gathered
7305 * provide it to the application, this can take different forms based
7306 * on the parsing modes.
7307 */
7308 if (ent->children == NULL) {
7309 /*
7310 * Probably running in SAX mode and the callbacks don't
7311 * build the entity content. So unless we already went
7312 * though parsing for first checking go though the entity
7313 * content to generate callbacks associated to the entity
7314 */
7315 if (was_checked != 0) {
7316 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007317 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007318 * This is a bit hackish but this seems the best
7319 * way to make sure both SAX and DOM entity support
7320 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007321 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007322 if (ctxt->userData == ctxt)
7323 user_data = NULL;
7324 else
7325 user_data = ctxt->userData;
7326
7327 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7328 ctxt->depth++;
7329 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7330 ent->content, user_data, NULL);
7331 ctxt->depth--;
7332 } else if (ent->etype ==
7333 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7334 ctxt->depth++;
7335 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7336 ctxt->sax, user_data, ctxt->depth,
7337 ent->URI, ent->ExternalID, NULL);
7338 ctxt->depth--;
7339 } else {
7340 ret = XML_ERR_ENTITY_PE_INTERNAL;
7341 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7342 "invalid entity type found\n", NULL);
7343 }
7344 if (ret == XML_ERR_ENTITY_LOOP) {
7345 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7346 return;
7347 }
7348 }
7349 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7350 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7351 /*
7352 * Entity reference callback comes second, it's somewhat
7353 * superfluous but a compatibility to historical behaviour
7354 */
7355 ctxt->sax->reference(ctxt->userData, ent->name);
7356 }
7357 return;
7358 }
7359
7360 /*
7361 * If we didn't get any children for the entity being built
7362 */
7363 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7364 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7365 /*
7366 * Create a node.
7367 */
7368 ctxt->sax->reference(ctxt->userData, ent->name);
7369 return;
7370 }
7371
7372 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7373 /*
7374 * There is a problem on the handling of _private for entities
7375 * (bug 155816): Should we copy the content of the field from
7376 * the entity (possibly overwriting some value set by the user
7377 * when a copy is created), should we leave it alone, or should
7378 * we try to take care of different situations? The problem
7379 * is exacerbated by the usage of this field by the xmlReader.
7380 * To fix this bug, we look at _private on the created node
7381 * and, if it's NULL, we copy in whatever was in the entity.
7382 * If it's not NULL we leave it alone. This is somewhat of a
7383 * hack - maybe we should have further tests to determine
7384 * what to do.
7385 */
7386 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7387 /*
7388 * Seems we are generating the DOM content, do
7389 * a simple tree copy for all references except the first
7390 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007391 */
7392 if (((list == NULL) && (ent->owner == 0)) ||
7393 (ctxt->parseMode == XML_PARSE_READER)) {
7394 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7395
7396 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007397 * We are copying here, make sure there is no abuse
7398 */
7399 ctxt->sizeentcopy += ent->length;
7400 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7401 return;
7402
7403 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007404 * when operating on a reader, the entities definitions
7405 * are always owning the entities subtree.
7406 if (ctxt->parseMode == XML_PARSE_READER)
7407 ent->owner = 1;
7408 */
7409
7410 cur = ent->children;
7411 while (cur != NULL) {
7412 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7413 if (nw != NULL) {
7414 if (nw->_private == NULL)
7415 nw->_private = cur->_private;
7416 if (firstChild == NULL){
7417 firstChild = nw;
7418 }
7419 nw = xmlAddChild(ctxt->node, nw);
7420 }
7421 if (cur == ent->last) {
7422 /*
7423 * needed to detect some strange empty
7424 * node cases in the reader tests
7425 */
7426 if ((ctxt->parseMode == XML_PARSE_READER) &&
7427 (nw != NULL) &&
7428 (nw->type == XML_ELEMENT_NODE) &&
7429 (nw->children == NULL))
7430 nw->extra = 1;
7431
7432 break;
7433 }
7434 cur = cur->next;
7435 }
7436#ifdef LIBXML_LEGACY_ENABLED
7437 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7438 xmlAddEntityReference(ent, firstChild, nw);
7439#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007440 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007441 xmlNodePtr nw = NULL, cur, next, last,
7442 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007443
7444 /*
7445 * We are copying here, make sure there is no abuse
7446 */
7447 ctxt->sizeentcopy += ent->length;
7448 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7449 return;
7450
Daniel Veillard0161e632008-08-28 15:36:32 +00007451 /*
7452 * Copy the entity child list and make it the new
7453 * entity child list. The goal is to make sure any
7454 * ID or REF referenced will be the one from the
7455 * document content and not the entity copy.
7456 */
7457 cur = ent->children;
7458 ent->children = NULL;
7459 last = ent->last;
7460 ent->last = NULL;
7461 while (cur != NULL) {
7462 next = cur->next;
7463 cur->next = NULL;
7464 cur->parent = NULL;
7465 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7466 if (nw != NULL) {
7467 if (nw->_private == NULL)
7468 nw->_private = cur->_private;
7469 if (firstChild == NULL){
7470 firstChild = cur;
7471 }
7472 xmlAddChild((xmlNodePtr) ent, nw);
7473 xmlAddChild(ctxt->node, cur);
7474 }
7475 if (cur == last)
7476 break;
7477 cur = next;
7478 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007479 if (ent->owner == 0)
7480 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007481#ifdef LIBXML_LEGACY_ENABLED
7482 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7483 xmlAddEntityReference(ent, firstChild, nw);
7484#endif /* LIBXML_LEGACY_ENABLED */
7485 } else {
7486 const xmlChar *nbktext;
7487
7488 /*
7489 * the name change is to avoid coalescing of the
7490 * node with a possible previous text one which
7491 * would make ent->children a dangling pointer
7492 */
7493 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7494 -1);
7495 if (ent->children->type == XML_TEXT_NODE)
7496 ent->children->name = nbktext;
7497 if ((ent->last != ent->children) &&
7498 (ent->last->type == XML_TEXT_NODE))
7499 ent->last->name = nbktext;
7500 xmlAddChildList(ctxt->node, ent->children);
7501 }
7502
7503 /*
7504 * This is to avoid a nasty side effect, see
7505 * characters() in SAX.c
7506 */
7507 ctxt->nodemem = 0;
7508 ctxt->nodelen = 0;
7509 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007510 }
7511 }
7512}
7513
7514/**
7515 * xmlParseEntityRef:
7516 * @ctxt: an XML parser context
7517 *
7518 * parse ENTITY references declarations
7519 *
7520 * [68] EntityRef ::= '&' Name ';'
7521 *
7522 * [ WFC: Entity Declared ]
7523 * In a document without any DTD, a document with only an internal DTD
7524 * subset which contains no parameter entity references, or a document
7525 * with "standalone='yes'", the Name given in the entity reference
7526 * must match that in an entity declaration, except that well-formed
7527 * documents need not declare any of the following entities: amp, lt,
7528 * gt, apos, quot. The declaration of a parameter entity must precede
7529 * any reference to it. Similarly, the declaration of a general entity
7530 * must precede any reference to it which appears in a default value in an
7531 * attribute-list declaration. Note that if entities are declared in the
7532 * external subset or in external parameter entities, a non-validating
7533 * processor is not obligated to read and process their declarations;
7534 * for such documents, the rule that an entity must be declared is a
7535 * well-formedness constraint only if standalone='yes'.
7536 *
7537 * [ WFC: Parsed Entity ]
7538 * An entity reference must not contain the name of an unparsed entity
7539 *
7540 * Returns the xmlEntityPtr if found, or NULL otherwise.
7541 */
7542xmlEntityPtr
7543xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007544 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007545 xmlEntityPtr ent = NULL;
7546
7547 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007548 if (ctxt->instate == XML_PARSER_EOF)
7549 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007550
Daniel Veillard0161e632008-08-28 15:36:32 +00007551 if (RAW != '&')
7552 return(NULL);
7553 NEXT;
7554 name = xmlParseName(ctxt);
7555 if (name == NULL) {
7556 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7557 "xmlParseEntityRef: no name\n");
7558 return(NULL);
7559 }
7560 if (RAW != ';') {
7561 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7562 return(NULL);
7563 }
7564 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007565
Daniel Veillard0161e632008-08-28 15:36:32 +00007566 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007567 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007568 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007569 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7570 ent = xmlGetPredefinedEntity(name);
7571 if (ent != NULL)
7572 return(ent);
7573 }
Owen Taylor3473f882001-02-23 17:55:21 +00007574
Daniel Veillard0161e632008-08-28 15:36:32 +00007575 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007576 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007577 */
7578 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007579
Daniel Veillard0161e632008-08-28 15:36:32 +00007580 /*
7581 * Ask first SAX for entity resolution, otherwise try the
7582 * entities which may have stored in the parser context.
7583 */
7584 if (ctxt->sax != NULL) {
7585 if (ctxt->sax->getEntity != NULL)
7586 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007587 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007588 (ctxt->options & XML_PARSE_OLDSAX))
7589 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007590 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7591 (ctxt->userData==ctxt)) {
7592 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007593 }
7594 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007595 if (ctxt->instate == XML_PARSER_EOF)
7596 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007597 /*
7598 * [ WFC: Entity Declared ]
7599 * In a document without any DTD, a document with only an
7600 * internal DTD subset which contains no parameter entity
7601 * references, or a document with "standalone='yes'", the
7602 * Name given in the entity reference must match that in an
7603 * entity declaration, except that well-formed documents
7604 * need not declare any of the following entities: amp, lt,
7605 * gt, apos, quot.
7606 * The declaration of a parameter entity must precede any
7607 * reference to it.
7608 * Similarly, the declaration of a general entity must
7609 * precede any reference to it which appears in a default
7610 * value in an attribute-list declaration. Note that if
7611 * entities are declared in the external subset or in
7612 * external parameter entities, a non-validating processor
7613 * is not obligated to read and process their declarations;
7614 * for such documents, the rule that an entity must be
7615 * declared is a well-formedness constraint only if
7616 * standalone='yes'.
7617 */
7618 if (ent == NULL) {
7619 if ((ctxt->standalone == 1) ||
7620 ((ctxt->hasExternalSubset == 0) &&
7621 (ctxt->hasPErefs == 0))) {
7622 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7623 "Entity '%s' not defined\n", name);
7624 } else {
7625 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7626 "Entity '%s' not defined\n", name);
7627 if ((ctxt->inSubset == 0) &&
7628 (ctxt->sax != NULL) &&
7629 (ctxt->sax->reference != NULL)) {
7630 ctxt->sax->reference(ctxt->userData, name);
7631 }
7632 }
7633 ctxt->valid = 0;
7634 }
7635
7636 /*
7637 * [ WFC: Parsed Entity ]
7638 * An entity reference must not contain the name of an
7639 * unparsed entity
7640 */
7641 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7642 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7643 "Entity reference to unparsed entity %s\n", name);
7644 }
7645
7646 /*
7647 * [ WFC: No External Entity References ]
7648 * Attribute values cannot contain direct or indirect
7649 * entity references to external entities.
7650 */
7651 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7652 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7653 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7654 "Attribute references external entity '%s'\n", name);
7655 }
7656 /*
7657 * [ WFC: No < in Attribute Values ]
7658 * The replacement text of any entity referred to directly or
7659 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007660 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007661 */
7662 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007663 (ent != NULL) &&
7664 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7665 if ((ent->checked & 1) || ((ent->checked == 0) &&
7666 (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7667 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7668 "'<' in entity '%s' is not allowed in attributes values\n", name);
7669 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007670 }
7671
7672 /*
7673 * Internal check, no parameter entities here ...
7674 */
7675 else {
7676 switch (ent->etype) {
7677 case XML_INTERNAL_PARAMETER_ENTITY:
7678 case XML_EXTERNAL_PARAMETER_ENTITY:
7679 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7680 "Attempt to reference the parameter entity '%s'\n",
7681 name);
7682 break;
7683 default:
7684 break;
7685 }
7686 }
7687
7688 /*
7689 * [ WFC: No Recursion ]
7690 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007691 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007692 * Done somewhere else
7693 */
Owen Taylor3473f882001-02-23 17:55:21 +00007694 return(ent);
7695}
7696
7697/**
7698 * xmlParseStringEntityRef:
7699 * @ctxt: an XML parser context
7700 * @str: a pointer to an index in the string
7701 *
7702 * parse ENTITY references declarations, but this version parses it from
7703 * a string value.
7704 *
7705 * [68] EntityRef ::= '&' Name ';'
7706 *
7707 * [ WFC: Entity Declared ]
7708 * In a document without any DTD, a document with only an internal DTD
7709 * subset which contains no parameter entity references, or a document
7710 * with "standalone='yes'", the Name given in the entity reference
7711 * must match that in an entity declaration, except that well-formed
7712 * documents need not declare any of the following entities: amp, lt,
7713 * gt, apos, quot. The declaration of a parameter entity must precede
7714 * any reference to it. Similarly, the declaration of a general entity
7715 * must precede any reference to it which appears in a default value in an
7716 * attribute-list declaration. Note that if entities are declared in the
7717 * external subset or in external parameter entities, a non-validating
7718 * processor is not obligated to read and process their declarations;
7719 * for such documents, the rule that an entity must be declared is a
7720 * well-formedness constraint only if standalone='yes'.
7721 *
7722 * [ WFC: Parsed Entity ]
7723 * An entity reference must not contain the name of an unparsed entity
7724 *
7725 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7726 * is updated to the current location in the string.
7727 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007728static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007729xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7730 xmlChar *name;
7731 const xmlChar *ptr;
7732 xmlChar cur;
7733 xmlEntityPtr ent = NULL;
7734
7735 if ((str == NULL) || (*str == NULL))
7736 return(NULL);
7737 ptr = *str;
7738 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007739 if (cur != '&')
7740 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007741
Daniel Veillard0161e632008-08-28 15:36:32 +00007742 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007743 name = xmlParseStringName(ctxt, &ptr);
7744 if (name == NULL) {
7745 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7746 "xmlParseStringEntityRef: no name\n");
7747 *str = ptr;
7748 return(NULL);
7749 }
7750 if (*ptr != ';') {
7751 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007752 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007753 *str = ptr;
7754 return(NULL);
7755 }
7756 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007757
Owen Taylor3473f882001-02-23 17:55:21 +00007758
Daniel Veillard0161e632008-08-28 15:36:32 +00007759 /*
7760 * Predefined entites override any extra definition
7761 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007762 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7763 ent = xmlGetPredefinedEntity(name);
7764 if (ent != NULL) {
7765 xmlFree(name);
7766 *str = ptr;
7767 return(ent);
7768 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007769 }
Owen Taylor3473f882001-02-23 17:55:21 +00007770
Daniel Veillard0161e632008-08-28 15:36:32 +00007771 /*
7772 * Increate the number of entity references parsed
7773 */
7774 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007775
Daniel Veillard0161e632008-08-28 15:36:32 +00007776 /*
7777 * Ask first SAX for entity resolution, otherwise try the
7778 * entities which may have stored in the parser context.
7779 */
7780 if (ctxt->sax != NULL) {
7781 if (ctxt->sax->getEntity != NULL)
7782 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007783 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7784 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007785 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7786 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007787 }
7788 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007789 if (ctxt->instate == XML_PARSER_EOF) {
7790 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007791 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007792 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007793
7794 /*
7795 * [ WFC: Entity Declared ]
7796 * In a document without any DTD, a document with only an
7797 * internal DTD subset which contains no parameter entity
7798 * references, or a document with "standalone='yes'", the
7799 * Name given in the entity reference must match that in an
7800 * entity declaration, except that well-formed documents
7801 * need not declare any of the following entities: amp, lt,
7802 * gt, apos, quot.
7803 * The declaration of a parameter entity must precede any
7804 * reference to it.
7805 * Similarly, the declaration of a general entity must
7806 * precede any reference to it which appears in a default
7807 * value in an attribute-list declaration. Note that if
7808 * entities are declared in the external subset or in
7809 * external parameter entities, a non-validating processor
7810 * is not obligated to read and process their declarations;
7811 * for such documents, the rule that an entity must be
7812 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007813 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007814 */
7815 if (ent == NULL) {
7816 if ((ctxt->standalone == 1) ||
7817 ((ctxt->hasExternalSubset == 0) &&
7818 (ctxt->hasPErefs == 0))) {
7819 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7820 "Entity '%s' not defined\n", name);
7821 } else {
7822 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7823 "Entity '%s' not defined\n",
7824 name);
7825 }
7826 /* TODO ? check regressions ctxt->valid = 0; */
7827 }
7828
7829 /*
7830 * [ WFC: Parsed Entity ]
7831 * An entity reference must not contain the name of an
7832 * unparsed entity
7833 */
7834 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7835 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7836 "Entity reference to unparsed entity %s\n", name);
7837 }
7838
7839 /*
7840 * [ WFC: No External Entity References ]
7841 * Attribute values cannot contain direct or indirect
7842 * entity references to external entities.
7843 */
7844 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7845 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7846 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7847 "Attribute references external entity '%s'\n", name);
7848 }
7849 /*
7850 * [ WFC: No < in Attribute Values ]
7851 * The replacement text of any entity referred to directly or
7852 * indirectly in an attribute value (other than "&lt;") must
7853 * not contain a <.
7854 */
7855 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7856 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007857 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007858 (xmlStrchr(ent->content, '<'))) {
7859 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7860 "'<' in entity '%s' is not allowed in attributes values\n",
7861 name);
7862 }
7863
7864 /*
7865 * Internal check, no parameter entities here ...
7866 */
7867 else {
7868 switch (ent->etype) {
7869 case XML_INTERNAL_PARAMETER_ENTITY:
7870 case XML_EXTERNAL_PARAMETER_ENTITY:
7871 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7872 "Attempt to reference the parameter entity '%s'\n",
7873 name);
7874 break;
7875 default:
7876 break;
7877 }
7878 }
7879
7880 /*
7881 * [ WFC: No Recursion ]
7882 * A parsed entity must not contain a recursive reference
7883 * to itself, either directly or indirectly.
7884 * Done somewhere else
7885 */
7886
7887 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007888 *str = ptr;
7889 return(ent);
7890}
7891
7892/**
7893 * xmlParsePEReference:
7894 * @ctxt: an XML parser context
7895 *
7896 * parse PEReference declarations
7897 * The entity content is handled directly by pushing it's content as
7898 * a new input stream.
7899 *
7900 * [69] PEReference ::= '%' Name ';'
7901 *
7902 * [ WFC: No Recursion ]
7903 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007904 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007905 *
7906 * [ WFC: Entity Declared ]
7907 * In a document without any DTD, a document with only an internal DTD
7908 * subset which contains no parameter entity references, or a document
7909 * with "standalone='yes'", ... ... The declaration of a parameter
7910 * entity must precede any reference to it...
7911 *
7912 * [ VC: Entity Declared ]
7913 * In a document with an external subset or external parameter entities
7914 * with "standalone='no'", ... ... The declaration of a parameter entity
7915 * must precede any reference to it...
7916 *
7917 * [ WFC: In DTD ]
7918 * Parameter-entity references may only appear in the DTD.
7919 * NOTE: misleading but this is handled.
7920 */
7921void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007922xmlParsePEReference(xmlParserCtxtPtr ctxt)
7923{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007924 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007925 xmlEntityPtr entity = NULL;
7926 xmlParserInputPtr input;
7927
Daniel Veillard0161e632008-08-28 15:36:32 +00007928 if (RAW != '%')
7929 return;
7930 NEXT;
7931 name = xmlParseName(ctxt);
7932 if (name == NULL) {
7933 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7934 "xmlParsePEReference: no name\n");
7935 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007936 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007937 if (RAW != ';') {
7938 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7939 return;
7940 }
7941
7942 NEXT;
7943
7944 /*
7945 * Increate the number of entity references parsed
7946 */
7947 ctxt->nbentities++;
7948
7949 /*
7950 * Request the entity from SAX
7951 */
7952 if ((ctxt->sax != NULL) &&
7953 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007954 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7955 if (ctxt->instate == XML_PARSER_EOF)
7956 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007957 if (entity == NULL) {
7958 /*
7959 * [ WFC: Entity Declared ]
7960 * In a document without any DTD, a document with only an
7961 * internal DTD subset which contains no parameter entity
7962 * references, or a document with "standalone='yes'", ...
7963 * ... The declaration of a parameter entity must precede
7964 * any reference to it...
7965 */
7966 if ((ctxt->standalone == 1) ||
7967 ((ctxt->hasExternalSubset == 0) &&
7968 (ctxt->hasPErefs == 0))) {
7969 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7970 "PEReference: %%%s; not found\n",
7971 name);
7972 } else {
7973 /*
7974 * [ VC: Entity Declared ]
7975 * In a document with an external subset or external
7976 * parameter entities with "standalone='no'", ...
7977 * ... The declaration of a parameter entity must
7978 * precede any reference to it...
7979 */
7980 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7981 "PEReference: %%%s; not found\n",
7982 name, NULL);
7983 ctxt->valid = 0;
7984 }
7985 } else {
7986 /*
7987 * Internal checking in case the entity quest barfed
7988 */
7989 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7990 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7991 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7992 "Internal: %%%s; is not a parameter entity\n",
7993 name, NULL);
7994 } else if (ctxt->input->free != deallocblankswrapper) {
7995 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7996 if (xmlPushInput(ctxt, input) < 0)
7997 return;
7998 } else {
7999 /*
8000 * TODO !!!
8001 * handle the extra spaces added before and after
8002 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8003 */
8004 input = xmlNewEntityInputStream(ctxt, entity);
8005 if (xmlPushInput(ctxt, input) < 0)
8006 return;
8007 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8008 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8009 (IS_BLANK_CH(NXT(5)))) {
8010 xmlParseTextDecl(ctxt);
8011 if (ctxt->errNo ==
8012 XML_ERR_UNSUPPORTED_ENCODING) {
8013 /*
8014 * The XML REC instructs us to stop parsing
8015 * right here
8016 */
8017 ctxt->instate = XML_PARSER_EOF;
8018 return;
8019 }
8020 }
8021 }
8022 }
8023 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008024}
8025
8026/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008027 * xmlLoadEntityContent:
8028 * @ctxt: an XML parser context
8029 * @entity: an unloaded system entity
8030 *
8031 * Load the original content of the given system entity from the
8032 * ExternalID/SystemID given. This is to be used for Included in Literal
8033 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8034 *
8035 * Returns 0 in case of success and -1 in case of failure
8036 */
8037static int
8038xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8039 xmlParserInputPtr input;
8040 xmlBufferPtr buf;
8041 int l, c;
8042 int count = 0;
8043
8044 if ((ctxt == NULL) || (entity == NULL) ||
8045 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8046 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8047 (entity->content != NULL)) {
8048 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8049 "xmlLoadEntityContent parameter error");
8050 return(-1);
8051 }
8052
8053 if (xmlParserDebugEntities)
8054 xmlGenericError(xmlGenericErrorContext,
8055 "Reading %s entity content input\n", entity->name);
8056
8057 buf = xmlBufferCreate();
8058 if (buf == NULL) {
8059 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8060 "xmlLoadEntityContent parameter error");
8061 return(-1);
8062 }
8063
8064 input = xmlNewEntityInputStream(ctxt, entity);
8065 if (input == NULL) {
8066 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8067 "xmlLoadEntityContent input error");
8068 xmlBufferFree(buf);
8069 return(-1);
8070 }
8071
8072 /*
8073 * Push the entity as the current input, read char by char
8074 * saving to the buffer until the end of the entity or an error
8075 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008076 if (xmlPushInput(ctxt, input) < 0) {
8077 xmlBufferFree(buf);
8078 return(-1);
8079 }
8080
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008081 GROW;
8082 c = CUR_CHAR(l);
8083 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8084 (IS_CHAR(c))) {
8085 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008086 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008087 count = 0;
8088 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008089 if (ctxt->instate == XML_PARSER_EOF) {
8090 xmlBufferFree(buf);
8091 return(-1);
8092 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008093 }
8094 NEXTL(l);
8095 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008096 if (c == 0) {
8097 count = 0;
8098 GROW;
8099 if (ctxt->instate == XML_PARSER_EOF) {
8100 xmlBufferFree(buf);
8101 return(-1);
8102 }
8103 c = CUR_CHAR(l);
8104 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008105 }
8106
8107 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8108 xmlPopInput(ctxt);
8109 } else if (!IS_CHAR(c)) {
8110 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8111 "xmlLoadEntityContent: invalid char value %d\n",
8112 c);
8113 xmlBufferFree(buf);
8114 return(-1);
8115 }
8116 entity->content = buf->content;
8117 buf->content = NULL;
8118 xmlBufferFree(buf);
8119
8120 return(0);
8121}
8122
8123/**
Owen Taylor3473f882001-02-23 17:55:21 +00008124 * xmlParseStringPEReference:
8125 * @ctxt: an XML parser context
8126 * @str: a pointer to an index in the string
8127 *
8128 * parse PEReference declarations
8129 *
8130 * [69] PEReference ::= '%' Name ';'
8131 *
8132 * [ WFC: No Recursion ]
8133 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008134 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008135 *
8136 * [ WFC: Entity Declared ]
8137 * In a document without any DTD, a document with only an internal DTD
8138 * subset which contains no parameter entity references, or a document
8139 * with "standalone='yes'", ... ... The declaration of a parameter
8140 * entity must precede any reference to it...
8141 *
8142 * [ VC: Entity Declared ]
8143 * In a document with an external subset or external parameter entities
8144 * with "standalone='no'", ... ... The declaration of a parameter entity
8145 * must precede any reference to it...
8146 *
8147 * [ WFC: In DTD ]
8148 * Parameter-entity references may only appear in the DTD.
8149 * NOTE: misleading but this is handled.
8150 *
8151 * Returns the string of the entity content.
8152 * str is updated to the current value of the index
8153 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008154static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008155xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8156 const xmlChar *ptr;
8157 xmlChar cur;
8158 xmlChar *name;
8159 xmlEntityPtr entity = NULL;
8160
8161 if ((str == NULL) || (*str == NULL)) return(NULL);
8162 ptr = *str;
8163 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008164 if (cur != '%')
8165 return(NULL);
8166 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008167 name = xmlParseStringName(ctxt, &ptr);
8168 if (name == NULL) {
8169 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8170 "xmlParseStringPEReference: no name\n");
8171 *str = ptr;
8172 return(NULL);
8173 }
8174 cur = *ptr;
8175 if (cur != ';') {
8176 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8177 xmlFree(name);
8178 *str = ptr;
8179 return(NULL);
8180 }
8181 ptr++;
8182
8183 /*
8184 * Increate the number of entity references parsed
8185 */
8186 ctxt->nbentities++;
8187
8188 /*
8189 * Request the entity from SAX
8190 */
8191 if ((ctxt->sax != NULL) &&
8192 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008193 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8194 if (ctxt->instate == XML_PARSER_EOF) {
8195 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008196 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008197 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008198 if (entity == NULL) {
8199 /*
8200 * [ WFC: Entity Declared ]
8201 * In a document without any DTD, a document with only an
8202 * internal DTD subset which contains no parameter entity
8203 * references, or a document with "standalone='yes'", ...
8204 * ... The declaration of a parameter entity must precede
8205 * any reference to it...
8206 */
8207 if ((ctxt->standalone == 1) ||
8208 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8209 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8210 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008211 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008212 /*
8213 * [ VC: Entity Declared ]
8214 * In a document with an external subset or external
8215 * parameter entities with "standalone='no'", ...
8216 * ... The declaration of a parameter entity must
8217 * precede any reference to it...
8218 */
8219 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8220 "PEReference: %%%s; not found\n",
8221 name, NULL);
8222 ctxt->valid = 0;
8223 }
8224 } else {
8225 /*
8226 * Internal checking in case the entity quest barfed
8227 */
8228 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8229 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8230 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8231 "%%%s; is not a parameter entity\n",
8232 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008233 }
8234 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008235 ctxt->hasPErefs = 1;
8236 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008237 *str = ptr;
8238 return(entity);
8239}
8240
8241/**
8242 * xmlParseDocTypeDecl:
8243 * @ctxt: an XML parser context
8244 *
8245 * parse a DOCTYPE declaration
8246 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008247 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008248 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8249 *
8250 * [ VC: Root Element Type ]
8251 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008252 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008253 */
8254
8255void
8256xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008257 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008258 xmlChar *ExternalID = NULL;
8259 xmlChar *URI = NULL;
8260
8261 /*
8262 * We know that '<!DOCTYPE' has been detected.
8263 */
8264 SKIP(9);
8265
8266 SKIP_BLANKS;
8267
8268 /*
8269 * Parse the DOCTYPE name.
8270 */
8271 name = xmlParseName(ctxt);
8272 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008273 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8274 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008275 }
8276 ctxt->intSubName = name;
8277
8278 SKIP_BLANKS;
8279
8280 /*
8281 * Check for SystemID and ExternalID
8282 */
8283 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8284
8285 if ((URI != NULL) || (ExternalID != NULL)) {
8286 ctxt->hasExternalSubset = 1;
8287 }
8288 ctxt->extSubURI = URI;
8289 ctxt->extSubSystem = ExternalID;
8290
8291 SKIP_BLANKS;
8292
8293 /*
8294 * Create and update the internal subset.
8295 */
8296 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8297 (!ctxt->disableSAX))
8298 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008299 if (ctxt->instate == XML_PARSER_EOF)
8300 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008301
8302 /*
8303 * Is there any internal subset declarations ?
8304 * they are handled separately in xmlParseInternalSubset()
8305 */
8306 if (RAW == '[')
8307 return;
8308
8309 /*
8310 * We should be at the end of the DOCTYPE declaration.
8311 */
8312 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008313 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008314 }
8315 NEXT;
8316}
8317
8318/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008319 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008320 * @ctxt: an XML parser context
8321 *
8322 * parse the internal subset declaration
8323 *
8324 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8325 */
8326
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008327static void
Owen Taylor3473f882001-02-23 17:55:21 +00008328xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8329 /*
8330 * Is there any DTD definition ?
8331 */
8332 if (RAW == '[') {
8333 ctxt->instate = XML_PARSER_DTD;
8334 NEXT;
8335 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008336 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008337 * PEReferences.
8338 * Subsequence (markupdecl | PEReference | S)*
8339 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008340 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008341 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008342 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008343
8344 SKIP_BLANKS;
8345 xmlParseMarkupDecl(ctxt);
8346 xmlParsePEReference(ctxt);
8347
8348 /*
8349 * Pop-up of finished entities.
8350 */
8351 while ((RAW == 0) && (ctxt->inputNr > 1))
8352 xmlPopInput(ctxt);
8353
8354 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008355 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008356 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008357 break;
8358 }
8359 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008360 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008361 NEXT;
8362 SKIP_BLANKS;
8363 }
8364 }
8365
8366 /*
8367 * We should be at the end of the DOCTYPE declaration.
8368 */
8369 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008370 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008371 }
8372 NEXT;
8373}
8374
Daniel Veillard81273902003-09-30 00:43:48 +00008375#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008376/**
8377 * xmlParseAttribute:
8378 * @ctxt: an XML parser context
8379 * @value: a xmlChar ** used to store the value of the attribute
8380 *
8381 * parse an attribute
8382 *
8383 * [41] Attribute ::= Name Eq AttValue
8384 *
8385 * [ WFC: No External Entity References ]
8386 * Attribute values cannot contain direct or indirect entity references
8387 * to external entities.
8388 *
8389 * [ WFC: No < in Attribute Values ]
8390 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008391 * an attribute value (other than "&lt;") must not contain a <.
8392 *
Owen Taylor3473f882001-02-23 17:55:21 +00008393 * [ VC: Attribute Value Type ]
8394 * The attribute must have been declared; the value must be of the type
8395 * declared for it.
8396 *
8397 * [25] Eq ::= S? '=' S?
8398 *
8399 * With namespace:
8400 *
8401 * [NS 11] Attribute ::= QName Eq AttValue
8402 *
8403 * Also the case QName == xmlns:??? is handled independently as a namespace
8404 * definition.
8405 *
8406 * Returns the attribute name, and the value in *value.
8407 */
8408
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008409const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008410xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008411 const xmlChar *name;
8412 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008413
8414 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008415 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008416 name = xmlParseName(ctxt);
8417 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008418 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008419 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008420 return(NULL);
8421 }
8422
8423 /*
8424 * read the value
8425 */
8426 SKIP_BLANKS;
8427 if (RAW == '=') {
8428 NEXT;
8429 SKIP_BLANKS;
8430 val = xmlParseAttValue(ctxt);
8431 ctxt->instate = XML_PARSER_CONTENT;
8432 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008433 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008434 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008435 return(NULL);
8436 }
8437
8438 /*
8439 * Check that xml:lang conforms to the specification
8440 * No more registered as an error, just generate a warning now
8441 * since this was deprecated in XML second edition
8442 */
8443 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8444 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008445 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8446 "Malformed value for xml:lang : %s\n",
8447 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008448 }
8449 }
8450
8451 /*
8452 * Check that xml:space conforms to the specification
8453 */
8454 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8455 if (xmlStrEqual(val, BAD_CAST "default"))
8456 *(ctxt->space) = 0;
8457 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8458 *(ctxt->space) = 1;
8459 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008460 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008461"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008462 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008463 }
8464 }
8465
8466 *value = val;
8467 return(name);
8468}
8469
8470/**
8471 * xmlParseStartTag:
8472 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008473 *
Owen Taylor3473f882001-02-23 17:55:21 +00008474 * parse a start of tag either for rule element or
8475 * EmptyElement. In both case we don't parse the tag closing chars.
8476 *
8477 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8478 *
8479 * [ WFC: Unique Att Spec ]
8480 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008481 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008482 *
8483 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8484 *
8485 * [ WFC: Unique Att Spec ]
8486 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008487 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008488 *
8489 * With namespace:
8490 *
8491 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8492 *
8493 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8494 *
8495 * Returns the element name parsed
8496 */
8497
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008498const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008499xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008500 const xmlChar *name;
8501 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008502 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008503 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008504 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008505 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008506 int i;
8507
8508 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008509 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008510
8511 name = xmlParseName(ctxt);
8512 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008513 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008514 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008515 return(NULL);
8516 }
8517
8518 /*
8519 * Now parse the attributes, it ends up with the ending
8520 *
8521 * (S Attribute)* S?
8522 */
8523 SKIP_BLANKS;
8524 GROW;
8525
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008526 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008527 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008528 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008529 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008530 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008531
8532 attname = xmlParseAttribute(ctxt, &attvalue);
8533 if ((attname != NULL) && (attvalue != NULL)) {
8534 /*
8535 * [ WFC: Unique Att Spec ]
8536 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008537 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008538 */
8539 for (i = 0; i < nbatts;i += 2) {
8540 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008541 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008542 xmlFree(attvalue);
8543 goto failed;
8544 }
8545 }
Owen Taylor3473f882001-02-23 17:55:21 +00008546 /*
8547 * Add the pair to atts
8548 */
8549 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008550 maxatts = 22; /* allow for 10 attrs by default */
8551 atts = (const xmlChar **)
8552 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008553 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008554 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008555 if (attvalue != NULL)
8556 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008557 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008558 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008559 ctxt->atts = atts;
8560 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008561 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008562 const xmlChar **n;
8563
Owen Taylor3473f882001-02-23 17:55:21 +00008564 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008565 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008566 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008567 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008568 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008569 if (attvalue != NULL)
8570 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008571 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008572 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008573 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008574 ctxt->atts = atts;
8575 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008576 }
8577 atts[nbatts++] = attname;
8578 atts[nbatts++] = attvalue;
8579 atts[nbatts] = NULL;
8580 atts[nbatts + 1] = NULL;
8581 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008582 if (attvalue != NULL)
8583 xmlFree(attvalue);
8584 }
8585
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008586failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008587
Daniel Veillard3772de32002-12-17 10:31:45 +00008588 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008589 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8590 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008591 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008592 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8593 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008594 }
8595 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008596 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8597 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008598 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8599 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008600 break;
8601 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008602 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008603 GROW;
8604 }
8605
8606 /*
8607 * SAX: Start of Element !
8608 */
8609 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008610 (!ctxt->disableSAX)) {
8611 if (nbatts > 0)
8612 ctxt->sax->startElement(ctxt->userData, name, atts);
8613 else
8614 ctxt->sax->startElement(ctxt->userData, name, NULL);
8615 }
Owen Taylor3473f882001-02-23 17:55:21 +00008616
8617 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008618 /* Free only the content strings */
8619 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008620 if (atts[i] != NULL)
8621 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008622 }
8623 return(name);
8624}
8625
8626/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008627 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008628 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008629 * @line: line of the start tag
8630 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008631 *
8632 * parse an end of tag
8633 *
8634 * [42] ETag ::= '</' Name S? '>'
8635 *
8636 * With namespace
8637 *
8638 * [NS 9] ETag ::= '</' QName S? '>'
8639 */
8640
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008641static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008642xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008643 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008644
8645 GROW;
8646 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008647 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008648 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008649 return;
8650 }
8651 SKIP(2);
8652
Daniel Veillard46de64e2002-05-29 08:21:33 +00008653 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008654
8655 /*
8656 * We should definitely be at the ending "S? '>'" part
8657 */
8658 GROW;
8659 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008660 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008661 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008662 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008663 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008664
8665 /*
8666 * [ WFC: Element Type Match ]
8667 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008668 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008669 *
8670 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008671 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008672 if (name == NULL) name = BAD_CAST "unparseable";
8673 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008674 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008675 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008676 }
8677
8678 /*
8679 * SAX: End of Tag
8680 */
8681 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8682 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008683 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008684
Daniel Veillarde57ec792003-09-10 10:50:59 +00008685 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008686 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008687 return;
8688}
8689
8690/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008691 * xmlParseEndTag:
8692 * @ctxt: an XML parser context
8693 *
8694 * parse an end of tag
8695 *
8696 * [42] ETag ::= '</' Name S? '>'
8697 *
8698 * With namespace
8699 *
8700 * [NS 9] ETag ::= '</' QName S? '>'
8701 */
8702
8703void
8704xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008705 xmlParseEndTag1(ctxt, 0);
8706}
Daniel Veillard81273902003-09-30 00:43:48 +00008707#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008708
8709/************************************************************************
8710 * *
8711 * SAX 2 specific operations *
8712 * *
8713 ************************************************************************/
8714
Daniel Veillard0fb18932003-09-07 09:14:37 +00008715/*
8716 * xmlGetNamespace:
8717 * @ctxt: an XML parser context
8718 * @prefix: the prefix to lookup
8719 *
8720 * Lookup the namespace name for the @prefix (which ca be NULL)
8721 * The prefix must come from the @ctxt->dict dictionnary
8722 *
8723 * Returns the namespace name or NULL if not bound
8724 */
8725static const xmlChar *
8726xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8727 int i;
8728
Daniel Veillarde57ec792003-09-10 10:50:59 +00008729 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008731 if (ctxt->nsTab[i] == prefix) {
8732 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8733 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008734 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008735 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008736 return(NULL);
8737}
8738
8739/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008740 * xmlParseQName:
8741 * @ctxt: an XML parser context
8742 * @prefix: pointer to store the prefix part
8743 *
8744 * parse an XML Namespace QName
8745 *
8746 * [6] QName ::= (Prefix ':')? LocalPart
8747 * [7] Prefix ::= NCName
8748 * [8] LocalPart ::= NCName
8749 *
8750 * Returns the Name parsed or NULL
8751 */
8752
8753static const xmlChar *
8754xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8755 const xmlChar *l, *p;
8756
8757 GROW;
8758
8759 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008760 if (l == NULL) {
8761 if (CUR == ':') {
8762 l = xmlParseName(ctxt);
8763 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008764 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008765 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008766 *prefix = NULL;
8767 return(l);
8768 }
8769 }
8770 return(NULL);
8771 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008772 if (CUR == ':') {
8773 NEXT;
8774 p = l;
8775 l = xmlParseNCName(ctxt);
8776 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008777 xmlChar *tmp;
8778
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008779 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8780 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008781 l = xmlParseNmtoken(ctxt);
8782 if (l == NULL)
8783 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8784 else {
8785 tmp = xmlBuildQName(l, p, NULL, 0);
8786 xmlFree((char *)l);
8787 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008788 p = xmlDictLookup(ctxt->dict, tmp, -1);
8789 if (tmp != NULL) xmlFree(tmp);
8790 *prefix = NULL;
8791 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008792 }
8793 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008794 xmlChar *tmp;
8795
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008796 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8797 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008798 NEXT;
8799 tmp = (xmlChar *) xmlParseName(ctxt);
8800 if (tmp != NULL) {
8801 tmp = xmlBuildQName(tmp, l, NULL, 0);
8802 l = xmlDictLookup(ctxt->dict, tmp, -1);
8803 if (tmp != NULL) xmlFree(tmp);
8804 *prefix = p;
8805 return(l);
8806 }
8807 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8808 l = xmlDictLookup(ctxt->dict, tmp, -1);
8809 if (tmp != NULL) xmlFree(tmp);
8810 *prefix = p;
8811 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008812 }
8813 *prefix = p;
8814 } else
8815 *prefix = NULL;
8816 return(l);
8817}
8818
8819/**
8820 * xmlParseQNameAndCompare:
8821 * @ctxt: an XML parser context
8822 * @name: the localname
8823 * @prefix: the prefix, if any.
8824 *
8825 * parse an XML name and compares for match
8826 * (specialized for endtag parsing)
8827 *
8828 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8829 * and the name for mismatch
8830 */
8831
8832static const xmlChar *
8833xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8834 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008835 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008836 const xmlChar *in;
8837 const xmlChar *ret;
8838 const xmlChar *prefix2;
8839
8840 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8841
8842 GROW;
8843 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008844
Daniel Veillard0fb18932003-09-07 09:14:37 +00008845 cmp = prefix;
8846 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008847 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008848 ++cmp;
8849 }
8850 if ((*cmp == 0) && (*in == ':')) {
8851 in++;
8852 cmp = name;
8853 while (*in != 0 && *in == *cmp) {
8854 ++in;
8855 ++cmp;
8856 }
William M. Brack76e95df2003-10-18 16:20:14 +00008857 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008858 /* success */
8859 ctxt->input->cur = in;
8860 return((const xmlChar*) 1);
8861 }
8862 }
8863 /*
8864 * all strings coms from the dictionary, equality can be done directly
8865 */
8866 ret = xmlParseQName (ctxt, &prefix2);
8867 if ((ret == name) && (prefix == prefix2))
8868 return((const xmlChar*) 1);
8869 return ret;
8870}
8871
8872/**
8873 * xmlParseAttValueInternal:
8874 * @ctxt: an XML parser context
8875 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008876 * @alloc: whether the attribute was reallocated as a new string
8877 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008878 *
8879 * parse a value for an attribute.
8880 * NOTE: if no normalization is needed, the routine will return pointers
8881 * directly from the data buffer.
8882 *
8883 * 3.3.3 Attribute-Value Normalization:
8884 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008885 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008886 * - a character reference is processed by appending the referenced
8887 * character to the attribute value
8888 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008889 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008890 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8891 * appending #x20 to the normalized value, except that only a single
8892 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008893 * parsed entity or the literal entity value of an internal parsed entity
8894 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008895 * If the declared value is not CDATA, then the XML processor must further
8896 * process the normalized attribute value by discarding any leading and
8897 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008898 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008899 * All attributes for which no declaration has been read should be treated
8900 * by a non-validating parser as if declared CDATA.
8901 *
8902 * Returns the AttValue parsed or NULL. The value has to be freed by the
8903 * caller if it was copied, this can be detected by val[*len] == 0.
8904 */
8905
8906static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008907xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8908 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008909{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008910 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008911 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008912 xmlChar *ret = NULL;
8913
8914 GROW;
8915 in = (xmlChar *) CUR_PTR;
8916 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008917 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008918 return (NULL);
8919 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008920 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008921
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008922 /*
8923 * try to handle in this routine the most common case where no
8924 * allocation of a new string is required and where content is
8925 * pure ASCII.
8926 */
8927 limit = *in++;
8928 end = ctxt->input->end;
8929 start = in;
8930 if (in >= end) {
8931 const xmlChar *oldbase = ctxt->input->base;
8932 GROW;
8933 if (oldbase != ctxt->input->base) {
8934 long delta = ctxt->input->base - oldbase;
8935 start = start + delta;
8936 in = in + delta;
8937 }
8938 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008939 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008940 if (normalize) {
8941 /*
8942 * Skip any leading spaces
8943 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008944 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008945 ((*in == 0x20) || (*in == 0x9) ||
8946 (*in == 0xA) || (*in == 0xD))) {
8947 in++;
8948 start = in;
8949 if (in >= end) {
8950 const xmlChar *oldbase = ctxt->input->base;
8951 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008952 if (ctxt->instate == XML_PARSER_EOF)
8953 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008954 if (oldbase != ctxt->input->base) {
8955 long delta = ctxt->input->base - oldbase;
8956 start = start + delta;
8957 in = in + delta;
8958 }
8959 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008960 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8961 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8962 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008963 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008964 return(NULL);
8965 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008966 }
8967 }
8968 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8969 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8970 if ((*in++ == 0x20) && (*in == 0x20)) break;
8971 if (in >= end) {
8972 const xmlChar *oldbase = ctxt->input->base;
8973 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008974 if (ctxt->instate == XML_PARSER_EOF)
8975 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008976 if (oldbase != ctxt->input->base) {
8977 long delta = ctxt->input->base - oldbase;
8978 start = start + delta;
8979 in = in + delta;
8980 }
8981 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008982 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8983 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8984 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008985 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008986 return(NULL);
8987 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008988 }
8989 }
8990 last = in;
8991 /*
8992 * skip the trailing blanks
8993 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008994 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008995 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008996 ((*in == 0x20) || (*in == 0x9) ||
8997 (*in == 0xA) || (*in == 0xD))) {
8998 in++;
8999 if (in >= end) {
9000 const xmlChar *oldbase = ctxt->input->base;
9001 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009002 if (ctxt->instate == XML_PARSER_EOF)
9003 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009004 if (oldbase != ctxt->input->base) {
9005 long delta = ctxt->input->base - oldbase;
9006 start = start + delta;
9007 in = in + delta;
9008 last = last + delta;
9009 }
9010 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009011 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9012 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9013 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009014 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009015 return(NULL);
9016 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009017 }
9018 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009019 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9020 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9021 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009022 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009023 return(NULL);
9024 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009025 if (*in != limit) goto need_complex;
9026 } else {
9027 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9028 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9029 in++;
9030 if (in >= end) {
9031 const xmlChar *oldbase = ctxt->input->base;
9032 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009033 if (ctxt->instate == XML_PARSER_EOF)
9034 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009035 if (oldbase != ctxt->input->base) {
9036 long delta = ctxt->input->base - oldbase;
9037 start = start + delta;
9038 in = in + delta;
9039 }
9040 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009041 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9042 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9043 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009044 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009045 return(NULL);
9046 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009047 }
9048 }
9049 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009050 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9051 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9052 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009053 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009054 return(NULL);
9055 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009056 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009057 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009058 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009059 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009060 *len = last - start;
9061 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009062 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009063 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009064 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009065 }
9066 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009067 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009068 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009069need_complex:
9070 if (alloc) *alloc = 1;
9071 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009072}
9073
9074/**
9075 * xmlParseAttribute2:
9076 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009077 * @pref: the element prefix
9078 * @elem: the element name
9079 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009080 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009081 * @len: an int * to save the length of the attribute
9082 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009083 *
9084 * parse an attribute in the new SAX2 framework.
9085 *
9086 * Returns the attribute name, and the value in *value, .
9087 */
9088
9089static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009090xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009091 const xmlChar * pref, const xmlChar * elem,
9092 const xmlChar ** prefix, xmlChar ** value,
9093 int *len, int *alloc)
9094{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009095 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009096 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009097 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009098
9099 *value = NULL;
9100 GROW;
9101 name = xmlParseQName(ctxt, prefix);
9102 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009103 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9104 "error parsing attribute name\n");
9105 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009106 }
9107
9108 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009109 * get the type if needed
9110 */
9111 if (ctxt->attsSpecial != NULL) {
9112 int type;
9113
9114 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009115 pref, elem, *prefix, name);
9116 if (type != 0)
9117 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009118 }
9119
9120 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009121 * read the value
9122 */
9123 SKIP_BLANKS;
9124 if (RAW == '=') {
9125 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009126 SKIP_BLANKS;
9127 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9128 if (normalize) {
9129 /*
9130 * Sometimes a second normalisation pass for spaces is needed
9131 * but that only happens if charrefs or entities refernces
9132 * have been used in the attribute value, i.e. the attribute
9133 * value have been extracted in an allocated string already.
9134 */
9135 if (*alloc) {
9136 const xmlChar *val2;
9137
9138 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009139 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009140 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009141 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009142 }
9143 }
9144 }
9145 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009146 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009147 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9148 "Specification mandate value for attribute %s\n",
9149 name);
9150 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009151 }
9152
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009153 if (*prefix == ctxt->str_xml) {
9154 /*
9155 * Check that xml:lang conforms to the specification
9156 * No more registered as an error, just generate a warning now
9157 * since this was deprecated in XML second edition
9158 */
9159 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9160 internal_val = xmlStrndup(val, *len);
9161 if (!xmlCheckLanguageID(internal_val)) {
9162 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9163 "Malformed value for xml:lang : %s\n",
9164 internal_val, NULL);
9165 }
9166 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009167
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009168 /*
9169 * Check that xml:space conforms to the specification
9170 */
9171 if (xmlStrEqual(name, BAD_CAST "space")) {
9172 internal_val = xmlStrndup(val, *len);
9173 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9174 *(ctxt->space) = 0;
9175 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9176 *(ctxt->space) = 1;
9177 else {
9178 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9179 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9180 internal_val, NULL);
9181 }
9182 }
9183 if (internal_val) {
9184 xmlFree(internal_val);
9185 }
9186 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009187
9188 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009189 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009190}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009191/**
9192 * xmlParseStartTag2:
9193 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009194 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009195 * parse a start of tag either for rule element or
9196 * EmptyElement. In both case we don't parse the tag closing chars.
9197 * This routine is called when running SAX2 parsing
9198 *
9199 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9200 *
9201 * [ WFC: Unique Att Spec ]
9202 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009203 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009204 *
9205 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9206 *
9207 * [ WFC: Unique Att Spec ]
9208 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009209 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009210 *
9211 * With namespace:
9212 *
9213 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9214 *
9215 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9216 *
9217 * Returns the element name parsed
9218 */
9219
9220static const xmlChar *
9221xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009222 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009223 const xmlChar *localname;
9224 const xmlChar *prefix;
9225 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009226 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009227 const xmlChar *nsname;
9228 xmlChar *attvalue;
9229 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009230 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009231 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009232 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009233 const xmlChar *base;
9234 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009235 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009236
9237 if (RAW != '<') return(NULL);
9238 NEXT1;
9239
9240 /*
9241 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9242 * point since the attribute values may be stored as pointers to
9243 * the buffer and calling SHRINK would destroy them !
9244 * The Shrinking is only possible once the full set of attribute
9245 * callbacks have been done.
9246 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009247reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009248 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009249 base = ctxt->input->base;
9250 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009251 oldline = ctxt->input->line;
9252 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009253 nbatts = 0;
9254 nratts = 0;
9255 nbdef = 0;
9256 nbNs = 0;
9257 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009258 /* Forget any namespaces added during an earlier parse of this element. */
9259 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009260
9261 localname = xmlParseQName(ctxt, &prefix);
9262 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009263 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9264 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009265 return(NULL);
9266 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009267 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009268
9269 /*
9270 * Now parse the attributes, it ends up with the ending
9271 *
9272 * (S Attribute)* S?
9273 */
9274 SKIP_BLANKS;
9275 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009276 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009277
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009278 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009279 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009280 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009281 const xmlChar *q = CUR_PTR;
9282 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009283 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009284
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009285 attname = xmlParseAttribute2(ctxt, prefix, localname,
9286 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009287 if (ctxt->input->base != base) {
9288 if ((attvalue != NULL) && (alloc != 0))
9289 xmlFree(attvalue);
9290 attvalue = NULL;
9291 goto base_changed;
9292 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009293 if ((attname != NULL) && (attvalue != NULL)) {
9294 if (len < 0) len = xmlStrlen(attvalue);
9295 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009296 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9297 xmlURIPtr uri;
9298
9299 if (*URL != 0) {
9300 uri = xmlParseURI((const char *) URL);
9301 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009302 xmlNsErr(ctxt, XML_WAR_NS_URI,
9303 "xmlns: '%s' is not a valid URI\n",
9304 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009305 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009306 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009307 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9308 "xmlns: URI %s is not absolute\n",
9309 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009310 }
9311 xmlFreeURI(uri);
9312 }
Daniel Veillard37334572008-07-31 08:20:02 +00009313 if (URL == ctxt->str_xml_ns) {
9314 if (attname != ctxt->str_xml) {
9315 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9316 "xml namespace URI cannot be the default namespace\n",
9317 NULL, NULL, NULL);
9318 }
9319 goto skip_default_ns;
9320 }
9321 if ((len == 29) &&
9322 (xmlStrEqual(URL,
9323 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9324 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9325 "reuse of the xmlns namespace name is forbidden\n",
9326 NULL, NULL, NULL);
9327 goto skip_default_ns;
9328 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009329 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009330 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009331 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009332 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009333 for (j = 1;j <= nbNs;j++)
9334 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9335 break;
9336 if (j <= nbNs)
9337 xmlErrAttributeDup(ctxt, NULL, attname);
9338 else
9339 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009340skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009341 if (alloc != 0) xmlFree(attvalue);
9342 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009343 continue;
9344 }
9345 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009346 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9347 xmlURIPtr uri;
9348
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009349 if (attname == ctxt->str_xml) {
9350 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009351 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9352 "xml namespace prefix mapped to wrong URI\n",
9353 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009354 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009355 /*
9356 * Do not keep a namespace definition node
9357 */
Daniel Veillard37334572008-07-31 08:20:02 +00009358 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009359 }
Daniel Veillard37334572008-07-31 08:20:02 +00009360 if (URL == ctxt->str_xml_ns) {
9361 if (attname != ctxt->str_xml) {
9362 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9363 "xml namespace URI mapped to wrong prefix\n",
9364 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009365 }
Daniel Veillard37334572008-07-31 08:20:02 +00009366 goto skip_ns;
9367 }
9368 if (attname == ctxt->str_xmlns) {
9369 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9370 "redefinition of the xmlns prefix is forbidden\n",
9371 NULL, NULL, NULL);
9372 goto skip_ns;
9373 }
9374 if ((len == 29) &&
9375 (xmlStrEqual(URL,
9376 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9377 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9378 "reuse of the xmlns namespace name is forbidden\n",
9379 NULL, NULL, NULL);
9380 goto skip_ns;
9381 }
9382 if ((URL == NULL) || (URL[0] == 0)) {
9383 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9384 "xmlns:%s: Empty XML namespace is not allowed\n",
9385 attname, NULL, NULL);
9386 goto skip_ns;
9387 } else {
9388 uri = xmlParseURI((const char *) URL);
9389 if (uri == NULL) {
9390 xmlNsErr(ctxt, XML_WAR_NS_URI,
9391 "xmlns:%s: '%s' is not a valid URI\n",
9392 attname, URL, NULL);
9393 } else {
9394 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9395 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9396 "xmlns:%s: URI %s is not absolute\n",
9397 attname, URL, NULL);
9398 }
9399 xmlFreeURI(uri);
9400 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009401 }
9402
Daniel Veillard0fb18932003-09-07 09:14:37 +00009403 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009404 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009405 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009406 for (j = 1;j <= nbNs;j++)
9407 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9408 break;
9409 if (j <= nbNs)
9410 xmlErrAttributeDup(ctxt, aprefix, attname);
9411 else
9412 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009413skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009414 if (alloc != 0) xmlFree(attvalue);
9415 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009416 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009417 continue;
9418 }
9419
9420 /*
9421 * Add the pair to atts
9422 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009423 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9424 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009425 if (attvalue[len] == 0)
9426 xmlFree(attvalue);
9427 goto failed;
9428 }
9429 maxatts = ctxt->maxatts;
9430 atts = ctxt->atts;
9431 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009432 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009433 atts[nbatts++] = attname;
9434 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009435 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009436 atts[nbatts++] = attvalue;
9437 attvalue += len;
9438 atts[nbatts++] = attvalue;
9439 /*
9440 * tag if some deallocation is needed
9441 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009442 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009443 } else {
9444 if ((attvalue != NULL) && (attvalue[len] == 0))
9445 xmlFree(attvalue);
9446 }
9447
Daniel Veillard37334572008-07-31 08:20:02 +00009448failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009449
9450 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009451 if (ctxt->instate == XML_PARSER_EOF)
9452 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009453 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009454 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9455 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009456 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009457 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9458 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009459 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009460 }
9461 SKIP_BLANKS;
9462 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9463 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009464 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009465 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009466 break;
9467 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009468 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009469 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009470 }
9471
Daniel Veillard0fb18932003-09-07 09:14:37 +00009472 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009473 * The attributes defaulting
9474 */
9475 if (ctxt->attsDefault != NULL) {
9476 xmlDefAttrsPtr defaults;
9477
9478 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9479 if (defaults != NULL) {
9480 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009481 attname = defaults->values[5 * i];
9482 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009483
9484 /*
9485 * special work for namespaces defaulted defs
9486 */
9487 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9488 /*
9489 * check that it's not a defined namespace
9490 */
9491 for (j = 1;j <= nbNs;j++)
9492 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9493 break;
9494 if (j <= nbNs) continue;
9495
9496 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009497 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009498 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009499 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009500 nbNs++;
9501 }
9502 } else if (aprefix == ctxt->str_xmlns) {
9503 /*
9504 * check that it's not a defined namespace
9505 */
9506 for (j = 1;j <= nbNs;j++)
9507 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9508 break;
9509 if (j <= nbNs) continue;
9510
9511 nsname = xmlGetNamespace(ctxt, attname);
9512 if (nsname != defaults->values[2]) {
9513 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009514 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009515 nbNs++;
9516 }
9517 } else {
9518 /*
9519 * check that it's not a defined attribute
9520 */
9521 for (j = 0;j < nbatts;j+=5) {
9522 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9523 break;
9524 }
9525 if (j < nbatts) continue;
9526
9527 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9528 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009529 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009530 }
9531 maxatts = ctxt->maxatts;
9532 atts = ctxt->atts;
9533 }
9534 atts[nbatts++] = attname;
9535 atts[nbatts++] = aprefix;
9536 if (aprefix == NULL)
9537 atts[nbatts++] = NULL;
9538 else
9539 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009540 atts[nbatts++] = defaults->values[5 * i + 2];
9541 atts[nbatts++] = defaults->values[5 * i + 3];
9542 if ((ctxt->standalone == 1) &&
9543 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009544 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009545 "standalone: attribute %s on %s defaulted from external subset\n",
9546 attname, localname);
9547 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009548 nbdef++;
9549 }
9550 }
9551 }
9552 }
9553
Daniel Veillarde70c8772003-11-25 07:21:18 +00009554 /*
9555 * The attributes checkings
9556 */
9557 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009558 /*
9559 * The default namespace does not apply to attribute names.
9560 */
9561 if (atts[i + 1] != NULL) {
9562 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9563 if (nsname == NULL) {
9564 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9565 "Namespace prefix %s for %s on %s is not defined\n",
9566 atts[i + 1], atts[i], localname);
9567 }
9568 atts[i + 2] = nsname;
9569 } else
9570 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009571 /*
9572 * [ WFC: Unique Att Spec ]
9573 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009574 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009575 * As extended by the Namespace in XML REC.
9576 */
9577 for (j = 0; j < i;j += 5) {
9578 if (atts[i] == atts[j]) {
9579 if (atts[i+1] == atts[j+1]) {
9580 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9581 break;
9582 }
9583 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9584 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9585 "Namespaced Attribute %s in '%s' redefined\n",
9586 atts[i], nsname, NULL);
9587 break;
9588 }
9589 }
9590 }
9591 }
9592
Daniel Veillarde57ec792003-09-10 10:50:59 +00009593 nsname = xmlGetNamespace(ctxt, prefix);
9594 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009595 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9596 "Namespace prefix %s on %s is not defined\n",
9597 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009598 }
9599 *pref = prefix;
9600 *URI = nsname;
9601
9602 /*
9603 * SAX: Start of Element !
9604 */
9605 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9606 (!ctxt->disableSAX)) {
9607 if (nbNs > 0)
9608 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9609 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9610 nbatts / 5, nbdef, atts);
9611 else
9612 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9613 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9614 }
9615
9616 /*
9617 * Free up attribute allocated strings if needed
9618 */
9619 if (attval != 0) {
9620 for (i = 3,j = 0; j < nratts;i += 5,j++)
9621 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9622 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009623 }
9624
9625 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009626
9627base_changed:
9628 /*
9629 * the attribute strings are valid iif the base didn't changed
9630 */
9631 if (attval != 0) {
9632 for (i = 3,j = 0; j < nratts;i += 5,j++)
9633 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9634 xmlFree((xmlChar *) atts[i]);
9635 }
9636 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009637 ctxt->input->line = oldline;
9638 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009639 if (ctxt->wellFormed == 1) {
9640 goto reparse;
9641 }
9642 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009643}
9644
9645/**
9646 * xmlParseEndTag2:
9647 * @ctxt: an XML parser context
9648 * @line: line of the start tag
9649 * @nsNr: number of namespaces on the start tag
9650 *
9651 * parse an end of tag
9652 *
9653 * [42] ETag ::= '</' Name S? '>'
9654 *
9655 * With namespace
9656 *
9657 * [NS 9] ETag ::= '</' QName S? '>'
9658 */
9659
9660static void
9661xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009662 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009663 const xmlChar *name;
9664
9665 GROW;
9666 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009667 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009668 return;
9669 }
9670 SKIP(2);
9671
William M. Brack13dfa872004-09-18 04:52:08 +00009672 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009673 if (ctxt->input->cur[tlen] == '>') {
9674 ctxt->input->cur += tlen + 1;
9675 goto done;
9676 }
9677 ctxt->input->cur += tlen;
9678 name = (xmlChar*)1;
9679 } else {
9680 if (prefix == NULL)
9681 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9682 else
9683 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9684 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009685
9686 /*
9687 * We should definitely be at the ending "S? '>'" part
9688 */
9689 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009690 if (ctxt->instate == XML_PARSER_EOF)
9691 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009692 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009693 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009694 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009695 } else
9696 NEXT1;
9697
9698 /*
9699 * [ WFC: Element Type Match ]
9700 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009701 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009702 *
9703 */
9704 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009705 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009706 if ((line == 0) && (ctxt->node != NULL))
9707 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009708 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009709 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009710 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009711 }
9712
9713 /*
9714 * SAX: End of Tag
9715 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009716done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009717 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9718 (!ctxt->disableSAX))
9719 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9720
Daniel Veillard0fb18932003-09-07 09:14:37 +00009721 spacePop(ctxt);
9722 if (nsNr != 0)
9723 nsPop(ctxt, nsNr);
9724 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009725}
9726
9727/**
Owen Taylor3473f882001-02-23 17:55:21 +00009728 * xmlParseCDSect:
9729 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009730 *
Owen Taylor3473f882001-02-23 17:55:21 +00009731 * Parse escaped pure raw content.
9732 *
9733 * [18] CDSect ::= CDStart CData CDEnd
9734 *
9735 * [19] CDStart ::= '<![CDATA['
9736 *
9737 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9738 *
9739 * [21] CDEnd ::= ']]>'
9740 */
9741void
9742xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9743 xmlChar *buf = NULL;
9744 int len = 0;
9745 int size = XML_PARSER_BUFFER_SIZE;
9746 int r, rl;
9747 int s, sl;
9748 int cur, l;
9749 int count = 0;
9750
Daniel Veillard8f597c32003-10-06 08:19:27 +00009751 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009752 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009753 SKIP(9);
9754 } else
9755 return;
9756
9757 ctxt->instate = XML_PARSER_CDATA_SECTION;
9758 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009759 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009760 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009761 ctxt->instate = XML_PARSER_CONTENT;
9762 return;
9763 }
9764 NEXTL(rl);
9765 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009766 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009767 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009768 ctxt->instate = XML_PARSER_CONTENT;
9769 return;
9770 }
9771 NEXTL(sl);
9772 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009773 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009774 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009775 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009776 return;
9777 }
William M. Brack871611b2003-10-18 04:53:14 +00009778 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009779 ((r != ']') || (s != ']') || (cur != '>'))) {
9780 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009781 xmlChar *tmp;
9782
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009783 if ((size > XML_MAX_TEXT_LENGTH) &&
9784 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9785 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9786 "CData section too big found", NULL);
9787 xmlFree (buf);
9788 return;
9789 }
9790 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009791 if (tmp == NULL) {
9792 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009793 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009794 return;
9795 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009796 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009797 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009798 }
9799 COPY_BUF(rl,buf,len,r);
9800 r = s;
9801 rl = sl;
9802 s = cur;
9803 sl = l;
9804 count++;
9805 if (count > 50) {
9806 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009807 if (ctxt->instate == XML_PARSER_EOF) {
9808 xmlFree(buf);
9809 return;
9810 }
Owen Taylor3473f882001-02-23 17:55:21 +00009811 count = 0;
9812 }
9813 NEXTL(l);
9814 cur = CUR_CHAR(l);
9815 }
9816 buf[len] = 0;
9817 ctxt->instate = XML_PARSER_CONTENT;
9818 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009819 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009820 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009821 xmlFree(buf);
9822 return;
9823 }
9824 NEXTL(l);
9825
9826 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009827 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009828 */
9829 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9830 if (ctxt->sax->cdataBlock != NULL)
9831 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009832 else if (ctxt->sax->characters != NULL)
9833 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009834 }
9835 xmlFree(buf);
9836}
9837
9838/**
9839 * xmlParseContent:
9840 * @ctxt: an XML parser context
9841 *
9842 * Parse a content:
9843 *
9844 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9845 */
9846
9847void
9848xmlParseContent(xmlParserCtxtPtr ctxt) {
9849 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009850 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009851 ((RAW != '<') || (NXT(1) != '/')) &&
9852 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009853 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009854 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009855 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009856
9857 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009858 * First case : a Processing Instruction.
9859 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009860 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009861 xmlParsePI(ctxt);
9862 }
9863
9864 /*
9865 * Second case : a CDSection
9866 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009867 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009868 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009869 xmlParseCDSect(ctxt);
9870 }
9871
9872 /*
9873 * Third case : a comment
9874 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009875 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009876 (NXT(2) == '-') && (NXT(3) == '-')) {
9877 xmlParseComment(ctxt);
9878 ctxt->instate = XML_PARSER_CONTENT;
9879 }
9880
9881 /*
9882 * Fourth case : a sub-element.
9883 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009884 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009885 xmlParseElement(ctxt);
9886 }
9887
9888 /*
9889 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009890 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009891 */
9892
Daniel Veillard21a0f912001-02-25 19:54:14 +00009893 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009894 xmlParseReference(ctxt);
9895 }
9896
9897 /*
9898 * Last case, text. Note that References are handled directly.
9899 */
9900 else {
9901 xmlParseCharData(ctxt, 0);
9902 }
9903
9904 GROW;
9905 /*
9906 * Pop-up of finished entities.
9907 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009908 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009909 xmlPopInput(ctxt);
9910 SHRINK;
9911
Daniel Veillardfdc91562002-07-01 21:52:03 +00009912 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009913 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9914 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009915 ctxt->instate = XML_PARSER_EOF;
9916 break;
9917 }
9918 }
9919}
9920
9921/**
9922 * xmlParseElement:
9923 * @ctxt: an XML parser context
9924 *
9925 * parse an XML element, this is highly recursive
9926 *
9927 * [39] element ::= EmptyElemTag | STag content ETag
9928 *
9929 * [ WFC: Element Type Match ]
9930 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009931 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009932 *
Owen Taylor3473f882001-02-23 17:55:21 +00009933 */
9934
9935void
9936xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009937 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009938 const xmlChar *prefix = NULL;
9939 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009940 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009941 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009942 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009943 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009944
Daniel Veillard8915c152008-08-26 13:05:34 +00009945 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9946 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9947 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9948 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9949 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009950 ctxt->instate = XML_PARSER_EOF;
9951 return;
9952 }
9953
Owen Taylor3473f882001-02-23 17:55:21 +00009954 /* Capture start position */
9955 if (ctxt->record_info) {
9956 node_info.begin_pos = ctxt->input->consumed +
9957 (CUR_PTR - ctxt->input->base);
9958 node_info.begin_line = ctxt->input->line;
9959 }
9960
9961 if (ctxt->spaceNr == 0)
9962 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009963 else if (*ctxt->space == -2)
9964 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009965 else
9966 spacePush(ctxt, *ctxt->space);
9967
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009968 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009969#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009970 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009971#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009972 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009973#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009974 else
9975 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009976#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009977 if (ctxt->instate == XML_PARSER_EOF)
9978 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009979 if (name == NULL) {
9980 spacePop(ctxt);
9981 return;
9982 }
9983 namePush(ctxt, name);
9984 ret = ctxt->node;
9985
Daniel Veillard4432df22003-09-28 18:58:27 +00009986#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009987 /*
9988 * [ VC: Root Element Type ]
9989 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009990 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009991 */
9992 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9993 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9994 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009995#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009996
9997 /*
9998 * Check for an Empty Element.
9999 */
10000 if ((RAW == '/') && (NXT(1) == '>')) {
10001 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010002 if (ctxt->sax2) {
10003 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10004 (!ctxt->disableSAX))
10005 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010006#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010007 } else {
10008 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10009 (!ctxt->disableSAX))
10010 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010011#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010012 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010013 namePop(ctxt);
10014 spacePop(ctxt);
10015 if (nsNr != ctxt->nsNr)
10016 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010017 if ( ret != NULL && ctxt->record_info ) {
10018 node_info.end_pos = ctxt->input->consumed +
10019 (CUR_PTR - ctxt->input->base);
10020 node_info.end_line = ctxt->input->line;
10021 node_info.node = ret;
10022 xmlParserAddNodeInfo(ctxt, &node_info);
10023 }
10024 return;
10025 }
10026 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010027 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010028 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010029 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10030 "Couldn't find end of Start Tag %s line %d\n",
10031 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010032
10033 /*
10034 * end of parsing of this node.
10035 */
10036 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010037 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010038 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010039 if (nsNr != ctxt->nsNr)
10040 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010041
10042 /*
10043 * Capture end position and add node
10044 */
10045 if ( ret != NULL && ctxt->record_info ) {
10046 node_info.end_pos = ctxt->input->consumed +
10047 (CUR_PTR - ctxt->input->base);
10048 node_info.end_line = ctxt->input->line;
10049 node_info.node = ret;
10050 xmlParserAddNodeInfo(ctxt, &node_info);
10051 }
10052 return;
10053 }
10054
10055 /*
10056 * Parse the content of the element:
10057 */
10058 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010059 if (ctxt->instate == XML_PARSER_EOF)
10060 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010061 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010062 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010063 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010064 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010065
10066 /*
10067 * end of parsing of this node.
10068 */
10069 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010070 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010071 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010072 if (nsNr != ctxt->nsNr)
10073 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010074 return;
10075 }
10076
10077 /*
10078 * parse the end of tag: '</' should be here.
10079 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010080 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010081 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010082 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010083 }
10084#ifdef LIBXML_SAX1_ENABLED
10085 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010086 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010087#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010088
10089 /*
10090 * Capture end position and add node
10091 */
10092 if ( ret != NULL && ctxt->record_info ) {
10093 node_info.end_pos = ctxt->input->consumed +
10094 (CUR_PTR - ctxt->input->base);
10095 node_info.end_line = ctxt->input->line;
10096 node_info.node = ret;
10097 xmlParserAddNodeInfo(ctxt, &node_info);
10098 }
10099}
10100
10101/**
10102 * xmlParseVersionNum:
10103 * @ctxt: an XML parser context
10104 *
10105 * parse the XML version value.
10106 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010107 * [26] VersionNum ::= '1.' [0-9]+
10108 *
10109 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010110 *
10111 * Returns the string giving the XML version number, or NULL
10112 */
10113xmlChar *
10114xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10115 xmlChar *buf = NULL;
10116 int len = 0;
10117 int size = 10;
10118 xmlChar cur;
10119
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010120 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010121 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010122 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010123 return(NULL);
10124 }
10125 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010126 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010127 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010128 return(NULL);
10129 }
10130 buf[len++] = cur;
10131 NEXT;
10132 cur=CUR;
10133 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010134 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010135 return(NULL);
10136 }
10137 buf[len++] = cur;
10138 NEXT;
10139 cur=CUR;
10140 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010141 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010142 xmlChar *tmp;
10143
Owen Taylor3473f882001-02-23 17:55:21 +000010144 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010145 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10146 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010147 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010148 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010149 return(NULL);
10150 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010151 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010152 }
10153 buf[len++] = cur;
10154 NEXT;
10155 cur=CUR;
10156 }
10157 buf[len] = 0;
10158 return(buf);
10159}
10160
10161/**
10162 * xmlParseVersionInfo:
10163 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010164 *
Owen Taylor3473f882001-02-23 17:55:21 +000010165 * parse the XML version.
10166 *
10167 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010168 *
Owen Taylor3473f882001-02-23 17:55:21 +000010169 * [25] Eq ::= S? '=' S?
10170 *
10171 * Returns the version string, e.g. "1.0"
10172 */
10173
10174xmlChar *
10175xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10176 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010177
Daniel Veillarda07050d2003-10-19 14:46:32 +000010178 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010179 SKIP(7);
10180 SKIP_BLANKS;
10181 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010182 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010183 return(NULL);
10184 }
10185 NEXT;
10186 SKIP_BLANKS;
10187 if (RAW == '"') {
10188 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010189 version = xmlParseVersionNum(ctxt);
10190 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010191 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010192 } else
10193 NEXT;
10194 } else if (RAW == '\''){
10195 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010196 version = xmlParseVersionNum(ctxt);
10197 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010198 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010199 } else
10200 NEXT;
10201 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010202 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010203 }
10204 }
10205 return(version);
10206}
10207
10208/**
10209 * xmlParseEncName:
10210 * @ctxt: an XML parser context
10211 *
10212 * parse the XML encoding name
10213 *
10214 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10215 *
10216 * Returns the encoding name value or NULL
10217 */
10218xmlChar *
10219xmlParseEncName(xmlParserCtxtPtr ctxt) {
10220 xmlChar *buf = NULL;
10221 int len = 0;
10222 int size = 10;
10223 xmlChar cur;
10224
10225 cur = CUR;
10226 if (((cur >= 'a') && (cur <= 'z')) ||
10227 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010228 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010229 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010230 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010231 return(NULL);
10232 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010233
Owen Taylor3473f882001-02-23 17:55:21 +000010234 buf[len++] = cur;
10235 NEXT;
10236 cur = CUR;
10237 while (((cur >= 'a') && (cur <= 'z')) ||
10238 ((cur >= 'A') && (cur <= 'Z')) ||
10239 ((cur >= '0') && (cur <= '9')) ||
10240 (cur == '.') || (cur == '_') ||
10241 (cur == '-')) {
10242 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010243 xmlChar *tmp;
10244
Owen Taylor3473f882001-02-23 17:55:21 +000010245 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010246 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10247 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010248 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010249 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010250 return(NULL);
10251 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010252 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010253 }
10254 buf[len++] = cur;
10255 NEXT;
10256 cur = CUR;
10257 if (cur == 0) {
10258 SHRINK;
10259 GROW;
10260 cur = CUR;
10261 }
10262 }
10263 buf[len] = 0;
10264 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010265 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010266 }
10267 return(buf);
10268}
10269
10270/**
10271 * xmlParseEncodingDecl:
10272 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010273 *
Owen Taylor3473f882001-02-23 17:55:21 +000010274 * parse the XML encoding declaration
10275 *
10276 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10277 *
10278 * this setups the conversion filters.
10279 *
10280 * Returns the encoding value or NULL
10281 */
10282
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010283const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010284xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10285 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010286
10287 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010288 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010289 SKIP(8);
10290 SKIP_BLANKS;
10291 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010292 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010293 return(NULL);
10294 }
10295 NEXT;
10296 SKIP_BLANKS;
10297 if (RAW == '"') {
10298 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010299 encoding = xmlParseEncName(ctxt);
10300 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010301 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010302 } else
10303 NEXT;
10304 } else if (RAW == '\''){
10305 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010306 encoding = xmlParseEncName(ctxt);
10307 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010308 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010309 } else
10310 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010311 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010312 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010313 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010314
10315 /*
10316 * Non standard parsing, allowing the user to ignore encoding
10317 */
10318 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10319 return(encoding);
10320
Daniel Veillard6b621b82003-08-11 15:03:34 +000010321 /*
10322 * UTF-16 encoding stwich has already taken place at this stage,
10323 * more over the little-endian/big-endian selection is already done
10324 */
10325 if ((encoding != NULL) &&
10326 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10327 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010328 /*
10329 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010330 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010331 * document is apparently UTF-8 compatible, then raise an
10332 * encoding mismatch fatal error
10333 */
10334 if ((ctxt->encoding == NULL) &&
10335 (ctxt->input->buf != NULL) &&
10336 (ctxt->input->buf->encoder == NULL)) {
10337 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10338 "Document labelled UTF-16 but has UTF-8 content\n");
10339 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010340 if (ctxt->encoding != NULL)
10341 xmlFree((xmlChar *) ctxt->encoding);
10342 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010343 }
10344 /*
10345 * UTF-8 encoding is handled natively
10346 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010347 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010348 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10349 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010350 if (ctxt->encoding != NULL)
10351 xmlFree((xmlChar *) ctxt->encoding);
10352 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010353 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010354 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010355 xmlCharEncodingHandlerPtr handler;
10356
10357 if (ctxt->input->encoding != NULL)
10358 xmlFree((xmlChar *) ctxt->input->encoding);
10359 ctxt->input->encoding = encoding;
10360
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010361 handler = xmlFindCharEncodingHandler((const char *) encoding);
10362 if (handler != NULL) {
10363 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010364 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010365 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010366 "Unsupported encoding %s\n", encoding);
10367 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010368 }
10369 }
10370 }
10371 return(encoding);
10372}
10373
10374/**
10375 * xmlParseSDDecl:
10376 * @ctxt: an XML parser context
10377 *
10378 * parse the XML standalone declaration
10379 *
10380 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010381 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010382 *
10383 * [ VC: Standalone Document Declaration ]
10384 * TODO The standalone document declaration must have the value "no"
10385 * if any external markup declarations contain declarations of:
10386 * - attributes with default values, if elements to which these
10387 * attributes apply appear in the document without specifications
10388 * of values for these attributes, or
10389 * - entities (other than amp, lt, gt, apos, quot), if references
10390 * to those entities appear in the document, or
10391 * - attributes with values subject to normalization, where the
10392 * attribute appears in the document with a value which will change
10393 * as a result of normalization, or
10394 * - element types with element content, if white space occurs directly
10395 * within any instance of those types.
10396 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010397 * Returns:
10398 * 1 if standalone="yes"
10399 * 0 if standalone="no"
10400 * -2 if standalone attribute is missing or invalid
10401 * (A standalone value of -2 means that the XML declaration was found,
10402 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010403 */
10404
10405int
10406xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010407 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010408
10409 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010410 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010411 SKIP(10);
10412 SKIP_BLANKS;
10413 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010414 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010415 return(standalone);
10416 }
10417 NEXT;
10418 SKIP_BLANKS;
10419 if (RAW == '\''){
10420 NEXT;
10421 if ((RAW == 'n') && (NXT(1) == 'o')) {
10422 standalone = 0;
10423 SKIP(2);
10424 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10425 (NXT(2) == 's')) {
10426 standalone = 1;
10427 SKIP(3);
10428 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010429 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010430 }
10431 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010432 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010433 } else
10434 NEXT;
10435 } else if (RAW == '"'){
10436 NEXT;
10437 if ((RAW == 'n') && (NXT(1) == 'o')) {
10438 standalone = 0;
10439 SKIP(2);
10440 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10441 (NXT(2) == 's')) {
10442 standalone = 1;
10443 SKIP(3);
10444 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010445 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010446 }
10447 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010448 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010449 } else
10450 NEXT;
10451 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010452 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010453 }
10454 }
10455 return(standalone);
10456}
10457
10458/**
10459 * xmlParseXMLDecl:
10460 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010461 *
Owen Taylor3473f882001-02-23 17:55:21 +000010462 * parse an XML declaration header
10463 *
10464 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10465 */
10466
10467void
10468xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10469 xmlChar *version;
10470
10471 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010472 * This value for standalone indicates that the document has an
10473 * XML declaration but it does not have a standalone attribute.
10474 * It will be overwritten later if a standalone attribute is found.
10475 */
10476 ctxt->input->standalone = -2;
10477
10478 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010479 * We know that '<?xml' is here.
10480 */
10481 SKIP(5);
10482
William M. Brack76e95df2003-10-18 16:20:14 +000010483 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010484 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10485 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010486 }
10487 SKIP_BLANKS;
10488
10489 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010490 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010491 */
10492 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010493 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010494 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010495 } else {
10496 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10497 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010498 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010499 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010500 if (ctxt->options & XML_PARSE_OLD10) {
10501 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10502 "Unsupported version '%s'\n",
10503 version);
10504 } else {
10505 if ((version[0] == '1') && ((version[1] == '.'))) {
10506 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10507 "Unsupported version '%s'\n",
10508 version, NULL);
10509 } else {
10510 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10511 "Unsupported version '%s'\n",
10512 version);
10513 }
10514 }
Daniel Veillard19840942001-11-29 16:11:38 +000010515 }
10516 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010517 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010518 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010519 }
Owen Taylor3473f882001-02-23 17:55:21 +000010520
10521 /*
10522 * We may have the encoding declaration
10523 */
William M. Brack76e95df2003-10-18 16:20:14 +000010524 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010525 if ((RAW == '?') && (NXT(1) == '>')) {
10526 SKIP(2);
10527 return;
10528 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010530 }
10531 xmlParseEncodingDecl(ctxt);
10532 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10533 /*
10534 * The XML REC instructs us to stop parsing right here
10535 */
10536 return;
10537 }
10538
10539 /*
10540 * We may have the standalone status.
10541 */
William M. Brack76e95df2003-10-18 16:20:14 +000010542 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010543 if ((RAW == '?') && (NXT(1) == '>')) {
10544 SKIP(2);
10545 return;
10546 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010547 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010548 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010549
10550 /*
10551 * We can grow the input buffer freely at that point
10552 */
10553 GROW;
10554
Owen Taylor3473f882001-02-23 17:55:21 +000010555 SKIP_BLANKS;
10556 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10557
10558 SKIP_BLANKS;
10559 if ((RAW == '?') && (NXT(1) == '>')) {
10560 SKIP(2);
10561 } else if (RAW == '>') {
10562 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010563 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010564 NEXT;
10565 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010566 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010567 MOVETO_ENDTAG(CUR_PTR);
10568 NEXT;
10569 }
10570}
10571
10572/**
10573 * xmlParseMisc:
10574 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010575 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010576 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010577 *
10578 * [27] Misc ::= Comment | PI | S
10579 */
10580
10581void
10582xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010583 while ((ctxt->instate != XML_PARSER_EOF) &&
10584 (((RAW == '<') && (NXT(1) == '?')) ||
10585 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10586 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010587 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010588 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010589 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010590 NEXT;
10591 } else
10592 xmlParseComment(ctxt);
10593 }
10594}
10595
10596/**
10597 * xmlParseDocument:
10598 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010599 *
Owen Taylor3473f882001-02-23 17:55:21 +000010600 * parse an XML document (and build a tree if using the standard SAX
10601 * interface).
10602 *
10603 * [1] document ::= prolog element Misc*
10604 *
10605 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10606 *
10607 * Returns 0, -1 in case of error. the parser context is augmented
10608 * as a result of the parsing.
10609 */
10610
10611int
10612xmlParseDocument(xmlParserCtxtPtr ctxt) {
10613 xmlChar start[4];
10614 xmlCharEncoding enc;
10615
10616 xmlInitParser();
10617
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010618 if ((ctxt == NULL) || (ctxt->input == NULL))
10619 return(-1);
10620
Owen Taylor3473f882001-02-23 17:55:21 +000010621 GROW;
10622
10623 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010624 * SAX: detecting the level.
10625 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010626 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010627
10628 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010629 * SAX: beginning of the document processing.
10630 */
10631 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10632 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010633 if (ctxt->instate == XML_PARSER_EOF)
10634 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010635
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010636 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010637 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010638 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010639 * Get the 4 first bytes and decode the charset
10640 * if enc != XML_CHAR_ENCODING_NONE
10641 * plug some encoding conversion routines.
10642 */
10643 start[0] = RAW;
10644 start[1] = NXT(1);
10645 start[2] = NXT(2);
10646 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010647 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010648 if (enc != XML_CHAR_ENCODING_NONE) {
10649 xmlSwitchEncoding(ctxt, enc);
10650 }
Owen Taylor3473f882001-02-23 17:55:21 +000010651 }
10652
10653
10654 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010655 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010656 }
10657
10658 /*
10659 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010660 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010661 * than just the first line, unless the amount of data is really
10662 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010663 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010664 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10665 GROW;
10666 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010667 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010668
10669 /*
10670 * Note that we will switch encoding on the fly.
10671 */
10672 xmlParseXMLDecl(ctxt);
10673 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10674 /*
10675 * The XML REC instructs us to stop parsing right here
10676 */
10677 return(-1);
10678 }
10679 ctxt->standalone = ctxt->input->standalone;
10680 SKIP_BLANKS;
10681 } else {
10682 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10683 }
10684 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10685 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010686 if (ctxt->instate == XML_PARSER_EOF)
10687 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010688 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10689 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10690 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10691 }
Owen Taylor3473f882001-02-23 17:55:21 +000010692
10693 /*
10694 * The Misc part of the Prolog
10695 */
10696 GROW;
10697 xmlParseMisc(ctxt);
10698
10699 /*
10700 * Then possibly doc type declaration(s) and more Misc
10701 * (doctypedecl Misc*)?
10702 */
10703 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010704 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010705
10706 ctxt->inSubset = 1;
10707 xmlParseDocTypeDecl(ctxt);
10708 if (RAW == '[') {
10709 ctxt->instate = XML_PARSER_DTD;
10710 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010711 if (ctxt->instate == XML_PARSER_EOF)
10712 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010713 }
10714
10715 /*
10716 * Create and update the external subset.
10717 */
10718 ctxt->inSubset = 2;
10719 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10720 (!ctxt->disableSAX))
10721 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10722 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010723 if (ctxt->instate == XML_PARSER_EOF)
10724 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010725 ctxt->inSubset = 0;
10726
Daniel Veillardac4118d2008-01-11 05:27:32 +000010727 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010728
10729 ctxt->instate = XML_PARSER_PROLOG;
10730 xmlParseMisc(ctxt);
10731 }
10732
10733 /*
10734 * Time to start parsing the tree itself
10735 */
10736 GROW;
10737 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010738 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10739 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010740 } else {
10741 ctxt->instate = XML_PARSER_CONTENT;
10742 xmlParseElement(ctxt);
10743 ctxt->instate = XML_PARSER_EPILOG;
10744
10745
10746 /*
10747 * The Misc part at the end
10748 */
10749 xmlParseMisc(ctxt);
10750
Daniel Veillard561b7f82002-03-20 21:55:57 +000010751 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010752 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010753 }
10754 ctxt->instate = XML_PARSER_EOF;
10755 }
10756
10757 /*
10758 * SAX: end of the document processing.
10759 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010760 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010761 ctxt->sax->endDocument(ctxt->userData);
10762
Daniel Veillard5997aca2002-03-18 18:36:20 +000010763 /*
10764 * Remove locally kept entity definitions if the tree was not built
10765 */
10766 if ((ctxt->myDoc != NULL) &&
10767 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10768 xmlFreeDoc(ctxt->myDoc);
10769 ctxt->myDoc = NULL;
10770 }
10771
Daniel Veillardae0765b2008-07-31 19:54:59 +000010772 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10773 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10774 if (ctxt->valid)
10775 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10776 if (ctxt->nsWellFormed)
10777 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10778 if (ctxt->options & XML_PARSE_OLD10)
10779 ctxt->myDoc->properties |= XML_DOC_OLD10;
10780 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010781 if (! ctxt->wellFormed) {
10782 ctxt->valid = 0;
10783 return(-1);
10784 }
Owen Taylor3473f882001-02-23 17:55:21 +000010785 return(0);
10786}
10787
10788/**
10789 * xmlParseExtParsedEnt:
10790 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010791 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010792 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010793 * An external general parsed entity is well-formed if it matches the
10794 * production labeled extParsedEnt.
10795 *
10796 * [78] extParsedEnt ::= TextDecl? content
10797 *
10798 * Returns 0, -1 in case of error. the parser context is augmented
10799 * as a result of the parsing.
10800 */
10801
10802int
10803xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10804 xmlChar start[4];
10805 xmlCharEncoding enc;
10806
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010807 if ((ctxt == NULL) || (ctxt->input == NULL))
10808 return(-1);
10809
Owen Taylor3473f882001-02-23 17:55:21 +000010810 xmlDefaultSAXHandlerInit();
10811
Daniel Veillard309f81d2003-09-23 09:02:53 +000010812 xmlDetectSAX2(ctxt);
10813
Owen Taylor3473f882001-02-23 17:55:21 +000010814 GROW;
10815
10816 /*
10817 * SAX: beginning of the document processing.
10818 */
10819 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10820 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10821
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010822 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010823 * Get the 4 first bytes and decode the charset
10824 * if enc != XML_CHAR_ENCODING_NONE
10825 * plug some encoding conversion routines.
10826 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010827 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10828 start[0] = RAW;
10829 start[1] = NXT(1);
10830 start[2] = NXT(2);
10831 start[3] = NXT(3);
10832 enc = xmlDetectCharEncoding(start, 4);
10833 if (enc != XML_CHAR_ENCODING_NONE) {
10834 xmlSwitchEncoding(ctxt, enc);
10835 }
Owen Taylor3473f882001-02-23 17:55:21 +000010836 }
10837
10838
10839 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010840 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010841 }
10842
10843 /*
10844 * Check for the XMLDecl in the Prolog.
10845 */
10846 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010847 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010848
10849 /*
10850 * Note that we will switch encoding on the fly.
10851 */
10852 xmlParseXMLDecl(ctxt);
10853 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10854 /*
10855 * The XML REC instructs us to stop parsing right here
10856 */
10857 return(-1);
10858 }
10859 SKIP_BLANKS;
10860 } else {
10861 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10862 }
10863 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10864 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010865 if (ctxt->instate == XML_PARSER_EOF)
10866 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010867
10868 /*
10869 * Doing validity checking on chunk doesn't make sense
10870 */
10871 ctxt->instate = XML_PARSER_CONTENT;
10872 ctxt->validate = 0;
10873 ctxt->loadsubset = 0;
10874 ctxt->depth = 0;
10875
10876 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010877 if (ctxt->instate == XML_PARSER_EOF)
10878 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010879
Owen Taylor3473f882001-02-23 17:55:21 +000010880 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010881 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010882 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010883 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010884 }
10885
10886 /*
10887 * SAX: end of the document processing.
10888 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010889 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010890 ctxt->sax->endDocument(ctxt->userData);
10891
10892 if (! ctxt->wellFormed) return(-1);
10893 return(0);
10894}
10895
Daniel Veillard73b013f2003-09-30 12:36:01 +000010896#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010897/************************************************************************
10898 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010899 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010900 * *
10901 ************************************************************************/
10902
10903/**
10904 * xmlParseLookupSequence:
10905 * @ctxt: an XML parser context
10906 * @first: the first char to lookup
10907 * @next: the next char to lookup or zero
10908 * @third: the next char to lookup or zero
10909 *
10910 * Try to find if a sequence (first, next, third) or just (first next) or
10911 * (first) is available in the input stream.
10912 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10913 * to avoid rescanning sequences of bytes, it DOES change the state of the
10914 * parser, do not use liberally.
10915 *
10916 * Returns the index to the current parsing point if the full sequence
10917 * is available, -1 otherwise.
10918 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010919static int
Owen Taylor3473f882001-02-23 17:55:21 +000010920xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10921 xmlChar next, xmlChar third) {
10922 int base, len;
10923 xmlParserInputPtr in;
10924 const xmlChar *buf;
10925
10926 in = ctxt->input;
10927 if (in == NULL) return(-1);
10928 base = in->cur - in->base;
10929 if (base < 0) return(-1);
10930 if (ctxt->checkIndex > base)
10931 base = ctxt->checkIndex;
10932 if (in->buf == NULL) {
10933 buf = in->base;
10934 len = in->length;
10935 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010936 buf = xmlBufContent(in->buf->buffer);
10937 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010938 }
10939 /* take into account the sequence length */
10940 if (third) len -= 2;
10941 else if (next) len --;
10942 for (;base < len;base++) {
10943 if (buf[base] == first) {
10944 if (third != 0) {
10945 if ((buf[base + 1] != next) ||
10946 (buf[base + 2] != third)) continue;
10947 } else if (next != 0) {
10948 if (buf[base + 1] != next) continue;
10949 }
10950 ctxt->checkIndex = 0;
10951#ifdef DEBUG_PUSH
10952 if (next == 0)
10953 xmlGenericError(xmlGenericErrorContext,
10954 "PP: lookup '%c' found at %d\n",
10955 first, base);
10956 else if (third == 0)
10957 xmlGenericError(xmlGenericErrorContext,
10958 "PP: lookup '%c%c' found at %d\n",
10959 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010960 else
Owen Taylor3473f882001-02-23 17:55:21 +000010961 xmlGenericError(xmlGenericErrorContext,
10962 "PP: lookup '%c%c%c' found at %d\n",
10963 first, next, third, base);
10964#endif
10965 return(base - (in->cur - in->base));
10966 }
10967 }
10968 ctxt->checkIndex = base;
10969#ifdef DEBUG_PUSH
10970 if (next == 0)
10971 xmlGenericError(xmlGenericErrorContext,
10972 "PP: lookup '%c' failed\n", first);
10973 else if (third == 0)
10974 xmlGenericError(xmlGenericErrorContext,
10975 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010976 else
Owen Taylor3473f882001-02-23 17:55:21 +000010977 xmlGenericError(xmlGenericErrorContext,
10978 "PP: lookup '%c%c%c' failed\n", first, next, third);
10979#endif
10980 return(-1);
10981}
10982
10983/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010984 * xmlParseGetLasts:
10985 * @ctxt: an XML parser context
10986 * @lastlt: pointer to store the last '<' from the input
10987 * @lastgt: pointer to store the last '>' from the input
10988 *
10989 * Lookup the last < and > in the current chunk
10990 */
10991static void
10992xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10993 const xmlChar **lastgt) {
10994 const xmlChar *tmp;
10995
10996 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10997 xmlGenericError(xmlGenericErrorContext,
10998 "Internal error: xmlParseGetLasts\n");
10999 return;
11000 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011001 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011002 tmp = ctxt->input->end;
11003 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011004 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011005 if (tmp < ctxt->input->base) {
11006 *lastlt = NULL;
11007 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011008 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011009 *lastlt = tmp;
11010 tmp++;
11011 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11012 if (*tmp == '\'') {
11013 tmp++;
11014 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11015 if (tmp < ctxt->input->end) tmp++;
11016 } else if (*tmp == '"') {
11017 tmp++;
11018 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11019 if (tmp < ctxt->input->end) tmp++;
11020 } else
11021 tmp++;
11022 }
11023 if (tmp < ctxt->input->end)
11024 *lastgt = tmp;
11025 else {
11026 tmp = *lastlt;
11027 tmp--;
11028 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11029 if (tmp >= ctxt->input->base)
11030 *lastgt = tmp;
11031 else
11032 *lastgt = NULL;
11033 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011034 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011035 } else {
11036 *lastlt = NULL;
11037 *lastgt = NULL;
11038 }
11039}
11040/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011041 * xmlCheckCdataPush:
11042 * @cur: pointer to the bock of characters
11043 * @len: length of the block in bytes
11044 *
11045 * Check that the block of characters is okay as SCdata content [20]
11046 *
11047 * Returns the number of bytes to pass if okay, a negative index where an
11048 * UTF-8 error occured otherwise
11049 */
11050static int
11051xmlCheckCdataPush(const xmlChar *utf, int len) {
11052 int ix;
11053 unsigned char c;
11054 int codepoint;
11055
11056 if ((utf == NULL) || (len <= 0))
11057 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011058
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011059 for (ix = 0; ix < len;) { /* string is 0-terminated */
11060 c = utf[ix];
11061 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11062 if (c >= 0x20)
11063 ix++;
11064 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11065 ix++;
11066 else
11067 return(-ix);
11068 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11069 if (ix + 2 > len) return(ix);
11070 if ((utf[ix+1] & 0xc0 ) != 0x80)
11071 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011072 codepoint = (utf[ix] & 0x1f) << 6;
11073 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011074 if (!xmlIsCharQ(codepoint))
11075 return(-ix);
11076 ix += 2;
11077 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11078 if (ix + 3 > len) return(ix);
11079 if (((utf[ix+1] & 0xc0) != 0x80) ||
11080 ((utf[ix+2] & 0xc0) != 0x80))
11081 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011082 codepoint = (utf[ix] & 0xf) << 12;
11083 codepoint |= (utf[ix+1] & 0x3f) << 6;
11084 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011085 if (!xmlIsCharQ(codepoint))
11086 return(-ix);
11087 ix += 3;
11088 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11089 if (ix + 4 > len) return(ix);
11090 if (((utf[ix+1] & 0xc0) != 0x80) ||
11091 ((utf[ix+2] & 0xc0) != 0x80) ||
11092 ((utf[ix+3] & 0xc0) != 0x80))
11093 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011094 codepoint = (utf[ix] & 0x7) << 18;
11095 codepoint |= (utf[ix+1] & 0x3f) << 12;
11096 codepoint |= (utf[ix+2] & 0x3f) << 6;
11097 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011098 if (!xmlIsCharQ(codepoint))
11099 return(-ix);
11100 ix += 4;
11101 } else /* unknown encoding */
11102 return(-ix);
11103 }
11104 return(ix);
11105}
11106
11107/**
Owen Taylor3473f882001-02-23 17:55:21 +000011108 * xmlParseTryOrFinish:
11109 * @ctxt: an XML parser context
11110 * @terminate: last chunk indicator
11111 *
11112 * Try to progress on parsing
11113 *
11114 * Returns zero if no parsing was possible
11115 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011116static int
Owen Taylor3473f882001-02-23 17:55:21 +000011117xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11118 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011119 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011120 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011121 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011122
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011123 if (ctxt->input == NULL)
11124 return(0);
11125
Owen Taylor3473f882001-02-23 17:55:21 +000011126#ifdef DEBUG_PUSH
11127 switch (ctxt->instate) {
11128 case XML_PARSER_EOF:
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: try EOF\n"); break;
11131 case XML_PARSER_START:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try START\n"); break;
11134 case XML_PARSER_MISC:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try MISC\n");break;
11137 case XML_PARSER_COMMENT:
11138 xmlGenericError(xmlGenericErrorContext,
11139 "PP: try COMMENT\n");break;
11140 case XML_PARSER_PROLOG:
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: try PROLOG\n");break;
11143 case XML_PARSER_START_TAG:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: try START_TAG\n");break;
11146 case XML_PARSER_CONTENT:
11147 xmlGenericError(xmlGenericErrorContext,
11148 "PP: try CONTENT\n");break;
11149 case XML_PARSER_CDATA_SECTION:
11150 xmlGenericError(xmlGenericErrorContext,
11151 "PP: try CDATA_SECTION\n");break;
11152 case XML_PARSER_END_TAG:
11153 xmlGenericError(xmlGenericErrorContext,
11154 "PP: try END_TAG\n");break;
11155 case XML_PARSER_ENTITY_DECL:
11156 xmlGenericError(xmlGenericErrorContext,
11157 "PP: try ENTITY_DECL\n");break;
11158 case XML_PARSER_ENTITY_VALUE:
11159 xmlGenericError(xmlGenericErrorContext,
11160 "PP: try ENTITY_VALUE\n");break;
11161 case XML_PARSER_ATTRIBUTE_VALUE:
11162 xmlGenericError(xmlGenericErrorContext,
11163 "PP: try ATTRIBUTE_VALUE\n");break;
11164 case XML_PARSER_DTD:
11165 xmlGenericError(xmlGenericErrorContext,
11166 "PP: try DTD\n");break;
11167 case XML_PARSER_EPILOG:
11168 xmlGenericError(xmlGenericErrorContext,
11169 "PP: try EPILOG\n");break;
11170 case XML_PARSER_PI:
11171 xmlGenericError(xmlGenericErrorContext,
11172 "PP: try PI\n");break;
11173 case XML_PARSER_IGNORE:
11174 xmlGenericError(xmlGenericErrorContext,
11175 "PP: try IGNORE\n");break;
11176 }
11177#endif
11178
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011179 if ((ctxt->input != NULL) &&
11180 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011181 xmlSHRINK(ctxt);
11182 ctxt->checkIndex = 0;
11183 }
11184 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011185
Daniel Veillarde50ba812013-04-11 15:54:51 +080011186 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011187 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011188 return(0);
11189
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011190
Owen Taylor3473f882001-02-23 17:55:21 +000011191 /*
11192 * Pop-up of finished entities.
11193 */
11194 while ((RAW == 0) && (ctxt->inputNr > 1))
11195 xmlPopInput(ctxt);
11196
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011197 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011198 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011199 avail = ctxt->input->length -
11200 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011201 else {
11202 /*
11203 * If we are operating on converted input, try to flush
11204 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011205 * buffer. But do not do this in document start where
11206 * encoding="..." may not have been read and we work on a
11207 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011208 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011209 if ((ctxt->instate != XML_PARSER_START) &&
11210 (ctxt->input->buf->raw != NULL) &&
11211 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011212 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11213 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011214 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011215
11216 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011217 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11218 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011219 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011220 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011221 (ctxt->input->cur - ctxt->input->base);
11222 }
Owen Taylor3473f882001-02-23 17:55:21 +000011223 if (avail < 1)
11224 goto done;
11225 switch (ctxt->instate) {
11226 case XML_PARSER_EOF:
11227 /*
11228 * Document parsing is done !
11229 */
11230 goto done;
11231 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011232 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11233 xmlChar start[4];
11234 xmlCharEncoding enc;
11235
11236 /*
11237 * Very first chars read from the document flow.
11238 */
11239 if (avail < 4)
11240 goto done;
11241
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011242 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011243 * Get the 4 first bytes and decode the charset
11244 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011245 * plug some encoding conversion routines,
11246 * else xmlSwitchEncoding will set to (default)
11247 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011248 */
11249 start[0] = RAW;
11250 start[1] = NXT(1);
11251 start[2] = NXT(2);
11252 start[3] = NXT(3);
11253 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011254 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011255 break;
11256 }
Owen Taylor3473f882001-02-23 17:55:21 +000011257
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011258 if (avail < 2)
11259 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011260 cur = ctxt->input->cur[0];
11261 next = ctxt->input->cur[1];
11262 if (cur == 0) {
11263 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11264 ctxt->sax->setDocumentLocator(ctxt->userData,
11265 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011266 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011267 ctxt->instate = XML_PARSER_EOF;
11268#ifdef DEBUG_PUSH
11269 xmlGenericError(xmlGenericErrorContext,
11270 "PP: entering EOF\n");
11271#endif
11272 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11273 ctxt->sax->endDocument(ctxt->userData);
11274 goto done;
11275 }
11276 if ((cur == '<') && (next == '?')) {
11277 /* PI or XML decl */
11278 if (avail < 5) return(ret);
11279 if ((!terminate) &&
11280 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11281 return(ret);
11282 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11283 ctxt->sax->setDocumentLocator(ctxt->userData,
11284 &xmlDefaultSAXLocator);
11285 if ((ctxt->input->cur[2] == 'x') &&
11286 (ctxt->input->cur[3] == 'm') &&
11287 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011288 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011289 ret += 5;
11290#ifdef DEBUG_PUSH
11291 xmlGenericError(xmlGenericErrorContext,
11292 "PP: Parsing XML Decl\n");
11293#endif
11294 xmlParseXMLDecl(ctxt);
11295 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11296 /*
11297 * The XML REC instructs us to stop parsing right
11298 * here
11299 */
11300 ctxt->instate = XML_PARSER_EOF;
11301 return(0);
11302 }
11303 ctxt->standalone = ctxt->input->standalone;
11304 if ((ctxt->encoding == NULL) &&
11305 (ctxt->input->encoding != NULL))
11306 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11307 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11308 (!ctxt->disableSAX))
11309 ctxt->sax->startDocument(ctxt->userData);
11310 ctxt->instate = XML_PARSER_MISC;
11311#ifdef DEBUG_PUSH
11312 xmlGenericError(xmlGenericErrorContext,
11313 "PP: entering MISC\n");
11314#endif
11315 } else {
11316 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11317 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11318 (!ctxt->disableSAX))
11319 ctxt->sax->startDocument(ctxt->userData);
11320 ctxt->instate = XML_PARSER_MISC;
11321#ifdef DEBUG_PUSH
11322 xmlGenericError(xmlGenericErrorContext,
11323 "PP: entering MISC\n");
11324#endif
11325 }
11326 } else {
11327 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11328 ctxt->sax->setDocumentLocator(ctxt->userData,
11329 &xmlDefaultSAXLocator);
11330 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011331 if (ctxt->version == NULL) {
11332 xmlErrMemory(ctxt, NULL);
11333 break;
11334 }
Owen Taylor3473f882001-02-23 17:55:21 +000011335 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11336 (!ctxt->disableSAX))
11337 ctxt->sax->startDocument(ctxt->userData);
11338 ctxt->instate = XML_PARSER_MISC;
11339#ifdef DEBUG_PUSH
11340 xmlGenericError(xmlGenericErrorContext,
11341 "PP: entering MISC\n");
11342#endif
11343 }
11344 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011345 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011346 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011347 const xmlChar *prefix = NULL;
11348 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011349 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011350
11351 if ((avail < 2) && (ctxt->inputNr == 1))
11352 goto done;
11353 cur = ctxt->input->cur[0];
11354 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011355 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011356 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011357 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11358 ctxt->sax->endDocument(ctxt->userData);
11359 goto done;
11360 }
11361 if (!terminate) {
11362 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011363 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011364 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011365 goto done;
11366 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11367 goto done;
11368 }
11369 }
11370 if (ctxt->spaceNr == 0)
11371 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011372 else if (*ctxt->space == -2)
11373 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011374 else
11375 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011376#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011377 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011378#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011379 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011380#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011381 else
11382 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011383#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011384 if (ctxt->instate == XML_PARSER_EOF)
11385 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011386 if (name == NULL) {
11387 spacePop(ctxt);
11388 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011389 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11390 ctxt->sax->endDocument(ctxt->userData);
11391 goto done;
11392 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011393#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011394 /*
11395 * [ VC: Root Element Type ]
11396 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011397 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011398 */
11399 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11400 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11401 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011402#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011403
11404 /*
11405 * Check for an Empty Element.
11406 */
11407 if ((RAW == '/') && (NXT(1) == '>')) {
11408 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011409
11410 if (ctxt->sax2) {
11411 if ((ctxt->sax != NULL) &&
11412 (ctxt->sax->endElementNs != NULL) &&
11413 (!ctxt->disableSAX))
11414 ctxt->sax->endElementNs(ctxt->userData, name,
11415 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011416 if (ctxt->nsNr - nsNr > 0)
11417 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011418#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011419 } else {
11420 if ((ctxt->sax != NULL) &&
11421 (ctxt->sax->endElement != NULL) &&
11422 (!ctxt->disableSAX))
11423 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011424#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011425 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011426 if (ctxt->instate == XML_PARSER_EOF)
11427 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011428 spacePop(ctxt);
11429 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011430 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011431 } else {
11432 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011433 }
Daniel Veillard65686452012-07-19 18:25:01 +080011434 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011435 break;
11436 }
11437 if (RAW == '>') {
11438 NEXT;
11439 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011440 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011441 "Couldn't find end of Start Tag %s\n",
11442 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011443 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011444 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011445 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011446 if (ctxt->sax2)
11447 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011448#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011449 else
11450 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011451#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011452
Daniel Veillarda880b122003-04-21 21:36:41 +000011453 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011454 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011455 break;
11456 }
11457 case XML_PARSER_CONTENT: {
11458 const xmlChar *test;
11459 unsigned int cons;
11460 if ((avail < 2) && (ctxt->inputNr == 1))
11461 goto done;
11462 cur = ctxt->input->cur[0];
11463 next = ctxt->input->cur[1];
11464
11465 test = CUR_PTR;
11466 cons = ctxt->input->consumed;
11467 if ((cur == '<') && (next == '/')) {
11468 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011469 break;
11470 } else if ((cur == '<') && (next == '?')) {
11471 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011472 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11473 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011474 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011475 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011476 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011477 ctxt->instate = XML_PARSER_CONTENT;
11478 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011479 } else if ((cur == '<') && (next != '!')) {
11480 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011481 break;
11482 } else if ((cur == '<') && (next == '!') &&
11483 (ctxt->input->cur[2] == '-') &&
11484 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011485 int term;
11486
11487 if (avail < 4)
11488 goto done;
11489 ctxt->input->cur += 4;
11490 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11491 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011492 if ((!terminate) && (term < 0)) {
11493 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011494 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011495 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011496 xmlParseComment(ctxt);
11497 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011498 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011499 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11500 (ctxt->input->cur[2] == '[') &&
11501 (ctxt->input->cur[3] == 'C') &&
11502 (ctxt->input->cur[4] == 'D') &&
11503 (ctxt->input->cur[5] == 'A') &&
11504 (ctxt->input->cur[6] == 'T') &&
11505 (ctxt->input->cur[7] == 'A') &&
11506 (ctxt->input->cur[8] == '[')) {
11507 SKIP(9);
11508 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011509 break;
11510 } else if ((cur == '<') && (next == '!') &&
11511 (avail < 9)) {
11512 goto done;
11513 } else if (cur == '&') {
11514 if ((!terminate) &&
11515 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11516 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011517 xmlParseReference(ctxt);
11518 } else {
11519 /* TODO Avoid the extra copy, handle directly !!! */
11520 /*
11521 * Goal of the following test is:
11522 * - minimize calls to the SAX 'character' callback
11523 * when they are mergeable
11524 * - handle an problem for isBlank when we only parse
11525 * a sequence of blank chars and the next one is
11526 * not available to check against '<' presence.
11527 * - tries to homogenize the differences in SAX
11528 * callbacks between the push and pull versions
11529 * of the parser.
11530 */
11531 if ((ctxt->inputNr == 1) &&
11532 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11533 if (!terminate) {
11534 if (ctxt->progressive) {
11535 if ((lastlt == NULL) ||
11536 (ctxt->input->cur > lastlt))
11537 goto done;
11538 } else if (xmlParseLookupSequence(ctxt,
11539 '<', 0, 0) < 0) {
11540 goto done;
11541 }
11542 }
11543 }
11544 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011545 xmlParseCharData(ctxt, 0);
11546 }
11547 /*
11548 * Pop-up of finished entities.
11549 */
11550 while ((RAW == 0) && (ctxt->inputNr > 1))
11551 xmlPopInput(ctxt);
11552 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011553 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11554 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011555 ctxt->instate = XML_PARSER_EOF;
11556 break;
11557 }
11558 break;
11559 }
11560 case XML_PARSER_END_TAG:
11561 if (avail < 2)
11562 goto done;
11563 if (!terminate) {
11564 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011565 /* > can be found unescaped in attribute values */
11566 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011567 goto done;
11568 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11569 goto done;
11570 }
11571 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011572 if (ctxt->sax2) {
11573 xmlParseEndTag2(ctxt,
11574 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11575 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011576 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011577 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011578 }
11579#ifdef LIBXML_SAX1_ENABLED
11580 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011581 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011582#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011583 if (ctxt->instate == XML_PARSER_EOF) {
11584 /* Nothing */
11585 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011586 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011587 } else {
11588 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011589 }
11590 break;
11591 case XML_PARSER_CDATA_SECTION: {
11592 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011593 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011594 * cdataBlock merge back contiguous callbacks.
11595 */
11596 int base;
11597
11598 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11599 if (base < 0) {
11600 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011601 int tmp;
11602
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011603 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011604 XML_PARSER_BIG_BUFFER_SIZE);
11605 if (tmp < 0) {
11606 tmp = -tmp;
11607 ctxt->input->cur += tmp;
11608 goto encoding_error;
11609 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011610 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11611 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011612 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011613 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011614 else if (ctxt->sax->characters != NULL)
11615 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011616 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011617 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011618 if (ctxt->instate == XML_PARSER_EOF)
11619 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011620 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011621 ctxt->checkIndex = 0;
11622 }
11623 goto done;
11624 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011625 int tmp;
11626
11627 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11628 if ((tmp < 0) || (tmp != base)) {
11629 tmp = -tmp;
11630 ctxt->input->cur += tmp;
11631 goto encoding_error;
11632 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011633 if ((ctxt->sax != NULL) && (base == 0) &&
11634 (ctxt->sax->cdataBlock != NULL) &&
11635 (!ctxt->disableSAX)) {
11636 /*
11637 * Special case to provide identical behaviour
11638 * between pull and push parsers on enpty CDATA
11639 * sections
11640 */
11641 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11642 (!strncmp((const char *)&ctxt->input->cur[-9],
11643 "<![CDATA[", 9)))
11644 ctxt->sax->cdataBlock(ctxt->userData,
11645 BAD_CAST "", 0);
11646 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011647 (!ctxt->disableSAX)) {
11648 if (ctxt->sax->cdataBlock != NULL)
11649 ctxt->sax->cdataBlock(ctxt->userData,
11650 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011651 else if (ctxt->sax->characters != NULL)
11652 ctxt->sax->characters(ctxt->userData,
11653 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011654 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011655 if (ctxt->instate == XML_PARSER_EOF)
11656 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011657 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011658 ctxt->checkIndex = 0;
11659 ctxt->instate = XML_PARSER_CONTENT;
11660#ifdef DEBUG_PUSH
11661 xmlGenericError(xmlGenericErrorContext,
11662 "PP: entering CONTENT\n");
11663#endif
11664 }
11665 break;
11666 }
Owen Taylor3473f882001-02-23 17:55:21 +000011667 case XML_PARSER_MISC:
11668 SKIP_BLANKS;
11669 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011670 avail = ctxt->input->length -
11671 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011672 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011673 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011674 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011675 if (avail < 2)
11676 goto done;
11677 cur = ctxt->input->cur[0];
11678 next = ctxt->input->cur[1];
11679 if ((cur == '<') && (next == '?')) {
11680 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011681 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11682 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011683 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011684 }
Owen Taylor3473f882001-02-23 17:55:21 +000011685#ifdef DEBUG_PUSH
11686 xmlGenericError(xmlGenericErrorContext,
11687 "PP: Parsing PI\n");
11688#endif
11689 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011690 if (ctxt->instate == XML_PARSER_EOF)
11691 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011692 ctxt->instate = XML_PARSER_MISC;
11693 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011694 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011695 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011696 (ctxt->input->cur[2] == '-') &&
11697 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011698 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011699 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11700 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011701 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011702 }
Owen Taylor3473f882001-02-23 17:55:21 +000011703#ifdef DEBUG_PUSH
11704 xmlGenericError(xmlGenericErrorContext,
11705 "PP: Parsing Comment\n");
11706#endif
11707 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011708 if (ctxt->instate == XML_PARSER_EOF)
11709 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011710 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011711 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011712 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011713 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011714 (ctxt->input->cur[2] == 'D') &&
11715 (ctxt->input->cur[3] == 'O') &&
11716 (ctxt->input->cur[4] == 'C') &&
11717 (ctxt->input->cur[5] == 'T') &&
11718 (ctxt->input->cur[6] == 'Y') &&
11719 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011720 (ctxt->input->cur[8] == 'E')) {
11721 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011722 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11723 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011724 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011725 }
Owen Taylor3473f882001-02-23 17:55:21 +000011726#ifdef DEBUG_PUSH
11727 xmlGenericError(xmlGenericErrorContext,
11728 "PP: Parsing internal subset\n");
11729#endif
11730 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011731 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011732 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011733 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011734 if (ctxt->instate == XML_PARSER_EOF)
11735 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011736 if (RAW == '[') {
11737 ctxt->instate = XML_PARSER_DTD;
11738#ifdef DEBUG_PUSH
11739 xmlGenericError(xmlGenericErrorContext,
11740 "PP: entering DTD\n");
11741#endif
11742 } else {
11743 /*
11744 * Create and update the external subset.
11745 */
11746 ctxt->inSubset = 2;
11747 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11748 (ctxt->sax->externalSubset != NULL))
11749 ctxt->sax->externalSubset(ctxt->userData,
11750 ctxt->intSubName, ctxt->extSubSystem,
11751 ctxt->extSubURI);
11752 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011753 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011754 ctxt->instate = XML_PARSER_PROLOG;
11755#ifdef DEBUG_PUSH
11756 xmlGenericError(xmlGenericErrorContext,
11757 "PP: entering PROLOG\n");
11758#endif
11759 }
11760 } else if ((cur == '<') && (next == '!') &&
11761 (avail < 9)) {
11762 goto done;
11763 } else {
11764 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011765 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011766 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011767#ifdef DEBUG_PUSH
11768 xmlGenericError(xmlGenericErrorContext,
11769 "PP: entering START_TAG\n");
11770#endif
11771 }
11772 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011773 case XML_PARSER_PROLOG:
11774 SKIP_BLANKS;
11775 if (ctxt->input->buf == NULL)
11776 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11777 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011778 avail = xmlBufUse(ctxt->input->buf->buffer) -
11779 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011780 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011781 goto done;
11782 cur = ctxt->input->cur[0];
11783 next = ctxt->input->cur[1];
11784 if ((cur == '<') && (next == '?')) {
11785 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011786 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11787 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011788 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011789 }
Owen Taylor3473f882001-02-23 17:55:21 +000011790#ifdef DEBUG_PUSH
11791 xmlGenericError(xmlGenericErrorContext,
11792 "PP: Parsing PI\n");
11793#endif
11794 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011795 if (ctxt->instate == XML_PARSER_EOF)
11796 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011797 ctxt->instate = XML_PARSER_PROLOG;
11798 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011799 } else if ((cur == '<') && (next == '!') &&
11800 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11801 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011802 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11803 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011804 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011805 }
Owen Taylor3473f882001-02-23 17:55:21 +000011806#ifdef DEBUG_PUSH
11807 xmlGenericError(xmlGenericErrorContext,
11808 "PP: Parsing Comment\n");
11809#endif
11810 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011811 if (ctxt->instate == XML_PARSER_EOF)
11812 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011813 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011814 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011815 } else if ((cur == '<') && (next == '!') &&
11816 (avail < 4)) {
11817 goto done;
11818 } else {
11819 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011820 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011821 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011822 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011823#ifdef DEBUG_PUSH
11824 xmlGenericError(xmlGenericErrorContext,
11825 "PP: entering START_TAG\n");
11826#endif
11827 }
11828 break;
11829 case XML_PARSER_EPILOG:
11830 SKIP_BLANKS;
11831 if (ctxt->input->buf == NULL)
11832 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11833 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011834 avail = xmlBufUse(ctxt->input->buf->buffer) -
11835 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011836 if (avail < 2)
11837 goto done;
11838 cur = ctxt->input->cur[0];
11839 next = ctxt->input->cur[1];
11840 if ((cur == '<') && (next == '?')) {
11841 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011842 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11843 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011844 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011845 }
Owen Taylor3473f882001-02-23 17:55:21 +000011846#ifdef DEBUG_PUSH
11847 xmlGenericError(xmlGenericErrorContext,
11848 "PP: Parsing PI\n");
11849#endif
11850 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011851 if (ctxt->instate == XML_PARSER_EOF)
11852 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011853 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011854 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011855 } else if ((cur == '<') && (next == '!') &&
11856 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11857 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011858 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11859 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011860 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011861 }
Owen Taylor3473f882001-02-23 17:55:21 +000011862#ifdef DEBUG_PUSH
11863 xmlGenericError(xmlGenericErrorContext,
11864 "PP: Parsing Comment\n");
11865#endif
11866 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011867 if (ctxt->instate == XML_PARSER_EOF)
11868 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011869 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011870 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011871 } else if ((cur == '<') && (next == '!') &&
11872 (avail < 4)) {
11873 goto done;
11874 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011875 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011876 ctxt->instate = XML_PARSER_EOF;
11877#ifdef DEBUG_PUSH
11878 xmlGenericError(xmlGenericErrorContext,
11879 "PP: entering EOF\n");
11880#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011881 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011882 ctxt->sax->endDocument(ctxt->userData);
11883 goto done;
11884 }
11885 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011886 case XML_PARSER_DTD: {
11887 /*
11888 * Sorry but progressive parsing of the internal subset
11889 * is not expected to be supported. We first check that
11890 * the full content of the internal subset is available and
11891 * the parsing is launched only at that point.
11892 * Internal subset ends up with "']' S? '>'" in an unescaped
11893 * section and not in a ']]>' sequence which are conditional
11894 * sections (whoever argued to keep that crap in XML deserve
11895 * a place in hell !).
11896 */
11897 int base, i;
11898 xmlChar *buf;
11899 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011900 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011901
11902 base = ctxt->input->cur - ctxt->input->base;
11903 if (base < 0) return(0);
11904 if (ctxt->checkIndex > base)
11905 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011906 buf = xmlBufContent(ctxt->input->buf->buffer);
11907 use = xmlBufUse(ctxt->input->buf->buffer);
11908 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011909 if (quote != 0) {
11910 if (buf[base] == quote)
11911 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011912 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011913 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011914 if ((quote == 0) && (buf[base] == '<')) {
11915 int found = 0;
11916 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011917 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011918 (buf[base + 1] == '!') &&
11919 (buf[base + 2] == '-') &&
11920 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011921 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011922 if ((buf[base] == '-') &&
11923 (buf[base + 1] == '-') &&
11924 (buf[base + 2] == '>')) {
11925 found = 1;
11926 base += 2;
11927 break;
11928 }
11929 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011930 if (!found) {
11931#if 0
11932 fprintf(stderr, "unfinished comment\n");
11933#endif
11934 break; /* for */
11935 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011936 continue;
11937 }
11938 }
Owen Taylor3473f882001-02-23 17:55:21 +000011939 if (buf[base] == '"') {
11940 quote = '"';
11941 continue;
11942 }
11943 if (buf[base] == '\'') {
11944 quote = '\'';
11945 continue;
11946 }
11947 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011948#if 0
11949 fprintf(stderr, "%c%c%c%c: ", buf[base],
11950 buf[base + 1], buf[base + 2], buf[base + 3]);
11951#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011952 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011953 break;
11954 if (buf[base + 1] == ']') {
11955 /* conditional crap, skip both ']' ! */
11956 base++;
11957 continue;
11958 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011959 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011960 if (buf[base + i] == '>') {
11961#if 0
11962 fprintf(stderr, "found\n");
11963#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011964 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011965 }
11966 if (!IS_BLANK_CH(buf[base + i])) {
11967#if 0
11968 fprintf(stderr, "not found\n");
11969#endif
11970 goto not_end_of_int_subset;
11971 }
Owen Taylor3473f882001-02-23 17:55:21 +000011972 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011973#if 0
11974 fprintf(stderr, "end of stream\n");
11975#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011976 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011977
Owen Taylor3473f882001-02-23 17:55:21 +000011978 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011979not_end_of_int_subset:
11980 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011981 }
11982 /*
11983 * We didn't found the end of the Internal subset
11984 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011985 if (quote == 0)
11986 ctxt->checkIndex = base;
11987 else
11988 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011989#ifdef DEBUG_PUSH
11990 if (next == 0)
11991 xmlGenericError(xmlGenericErrorContext,
11992 "PP: lookup of int subset end filed\n");
11993#endif
11994 goto done;
11995
11996found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011997 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011998 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011999 if (ctxt->instate == XML_PARSER_EOF)
12000 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012001 ctxt->inSubset = 2;
12002 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12003 (ctxt->sax->externalSubset != NULL))
12004 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12005 ctxt->extSubSystem, ctxt->extSubURI);
12006 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012007 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012008 if (ctxt->instate == XML_PARSER_EOF)
12009 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012010 ctxt->instate = XML_PARSER_PROLOG;
12011 ctxt->checkIndex = 0;
12012#ifdef DEBUG_PUSH
12013 xmlGenericError(xmlGenericErrorContext,
12014 "PP: entering PROLOG\n");
12015#endif
12016 break;
12017 }
12018 case XML_PARSER_COMMENT:
12019 xmlGenericError(xmlGenericErrorContext,
12020 "PP: internal error, state == COMMENT\n");
12021 ctxt->instate = XML_PARSER_CONTENT;
12022#ifdef DEBUG_PUSH
12023 xmlGenericError(xmlGenericErrorContext,
12024 "PP: entering CONTENT\n");
12025#endif
12026 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012027 case XML_PARSER_IGNORE:
12028 xmlGenericError(xmlGenericErrorContext,
12029 "PP: internal error, state == IGNORE");
12030 ctxt->instate = XML_PARSER_DTD;
12031#ifdef DEBUG_PUSH
12032 xmlGenericError(xmlGenericErrorContext,
12033 "PP: entering DTD\n");
12034#endif
12035 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012036 case XML_PARSER_PI:
12037 xmlGenericError(xmlGenericErrorContext,
12038 "PP: internal error, state == PI\n");
12039 ctxt->instate = XML_PARSER_CONTENT;
12040#ifdef DEBUG_PUSH
12041 xmlGenericError(xmlGenericErrorContext,
12042 "PP: entering CONTENT\n");
12043#endif
12044 break;
12045 case XML_PARSER_ENTITY_DECL:
12046 xmlGenericError(xmlGenericErrorContext,
12047 "PP: internal error, state == ENTITY_DECL\n");
12048 ctxt->instate = XML_PARSER_DTD;
12049#ifdef DEBUG_PUSH
12050 xmlGenericError(xmlGenericErrorContext,
12051 "PP: entering DTD\n");
12052#endif
12053 break;
12054 case XML_PARSER_ENTITY_VALUE:
12055 xmlGenericError(xmlGenericErrorContext,
12056 "PP: internal error, state == ENTITY_VALUE\n");
12057 ctxt->instate = XML_PARSER_CONTENT;
12058#ifdef DEBUG_PUSH
12059 xmlGenericError(xmlGenericErrorContext,
12060 "PP: entering DTD\n");
12061#endif
12062 break;
12063 case XML_PARSER_ATTRIBUTE_VALUE:
12064 xmlGenericError(xmlGenericErrorContext,
12065 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12066 ctxt->instate = XML_PARSER_START_TAG;
12067#ifdef DEBUG_PUSH
12068 xmlGenericError(xmlGenericErrorContext,
12069 "PP: entering START_TAG\n");
12070#endif
12071 break;
12072 case XML_PARSER_SYSTEM_LITERAL:
12073 xmlGenericError(xmlGenericErrorContext,
12074 "PP: internal error, state == SYSTEM_LITERAL\n");
12075 ctxt->instate = XML_PARSER_START_TAG;
12076#ifdef DEBUG_PUSH
12077 xmlGenericError(xmlGenericErrorContext,
12078 "PP: entering START_TAG\n");
12079#endif
12080 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012081 case XML_PARSER_PUBLIC_LITERAL:
12082 xmlGenericError(xmlGenericErrorContext,
12083 "PP: internal error, state == PUBLIC_LITERAL\n");
12084 ctxt->instate = XML_PARSER_START_TAG;
12085#ifdef DEBUG_PUSH
12086 xmlGenericError(xmlGenericErrorContext,
12087 "PP: entering START_TAG\n");
12088#endif
12089 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012090 }
12091 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012092done:
Owen Taylor3473f882001-02-23 17:55:21 +000012093#ifdef DEBUG_PUSH
12094 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12095#endif
12096 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012097encoding_error:
12098 {
12099 char buffer[150];
12100
12101 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12102 ctxt->input->cur[0], ctxt->input->cur[1],
12103 ctxt->input->cur[2], ctxt->input->cur[3]);
12104 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12105 "Input is not proper UTF-8, indicate encoding !\n%s",
12106 BAD_CAST buffer, NULL);
12107 }
12108 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012109}
12110
12111/**
Daniel Veillard65686452012-07-19 18:25:01 +080012112 * xmlParseCheckTransition:
12113 * @ctxt: an XML parser context
12114 * @chunk: a char array
12115 * @size: the size in byte of the chunk
12116 *
12117 * Check depending on the current parser state if the chunk given must be
12118 * processed immediately or one need more data to advance on parsing.
12119 *
12120 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12121 */
12122static int
12123xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12124 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12125 return(-1);
12126 if (ctxt->instate == XML_PARSER_START_TAG) {
12127 if (memchr(chunk, '>', size) != NULL)
12128 return(1);
12129 return(0);
12130 }
12131 if (ctxt->progressive == XML_PARSER_COMMENT) {
12132 if (memchr(chunk, '>', size) != NULL)
12133 return(1);
12134 return(0);
12135 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012136 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12137 if (memchr(chunk, '>', size) != NULL)
12138 return(1);
12139 return(0);
12140 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012141 if (ctxt->progressive == XML_PARSER_PI) {
12142 if (memchr(chunk, '>', size) != NULL)
12143 return(1);
12144 return(0);
12145 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012146 if (ctxt->instate == XML_PARSER_END_TAG) {
12147 if (memchr(chunk, '>', size) != NULL)
12148 return(1);
12149 return(0);
12150 }
12151 if ((ctxt->progressive == XML_PARSER_DTD) ||
12152 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012153 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012154 return(1);
12155 return(0);
12156 }
Daniel Veillard65686452012-07-19 18:25:01 +080012157 return(1);
12158}
12159
12160/**
Owen Taylor3473f882001-02-23 17:55:21 +000012161 * xmlParseChunk:
12162 * @ctxt: an XML parser context
12163 * @chunk: an char array
12164 * @size: the size in byte of the chunk
12165 * @terminate: last chunk indicator
12166 *
12167 * Parse a Chunk of memory
12168 *
12169 * Returns zero if no error, the xmlParserErrors otherwise.
12170 */
12171int
12172xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12173 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012174 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012175 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012176 size_t old_avail = 0;
12177 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012178
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012179 if (ctxt == NULL)
12180 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012181 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012182 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012183 if (ctxt->instate == XML_PARSER_EOF)
12184 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012185 if (ctxt->instate == XML_PARSER_START)
12186 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012187 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12188 (chunk[size - 1] == '\r')) {
12189 end_in_lf = 1;
12190 size--;
12191 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012192
12193xmldecl_done:
12194
Owen Taylor3473f882001-02-23 17:55:21 +000012195 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12196 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012197 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12198 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012199 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012200
Daniel Veillard65686452012-07-19 18:25:01 +080012201 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012202 /*
12203 * Specific handling if we autodetected an encoding, we should not
12204 * push more than the first line ... which depend on the encoding
12205 * And only push the rest once the final encoding was detected
12206 */
12207 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12208 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012209 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012210
12211 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12212 BAD_CAST "UTF-16")) ||
12213 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12214 BAD_CAST "UTF16")))
12215 len = 90;
12216 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12217 BAD_CAST "UCS-4")) ||
12218 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12219 BAD_CAST "UCS4")))
12220 len = 180;
12221
12222 if (ctxt->input->buf->rawconsumed < len)
12223 len -= ctxt->input->buf->rawconsumed;
12224
Raul Hudeaba9716a2010-03-15 10:13:29 +010012225 /*
12226 * Change size for reading the initial declaration only
12227 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12228 * will blindly copy extra bytes from memory.
12229 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012230 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012231 remain = size - len;
12232 size = len;
12233 } else {
12234 remain = 0;
12235 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012236 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012237 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012238 if (res < 0) {
12239 ctxt->errNo = XML_PARSER_EOF;
12240 ctxt->disableSAX = 1;
12241 return (XML_PARSER_EOF);
12242 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012243 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012244#ifdef DEBUG_PUSH
12245 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12246#endif
12247
Owen Taylor3473f882001-02-23 17:55:21 +000012248 } else if (ctxt->instate != XML_PARSER_EOF) {
12249 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12250 xmlParserInputBufferPtr in = ctxt->input->buf;
12251 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12252 (in->raw != NULL)) {
12253 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012254 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12255 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012256
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012257 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012258 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012259 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012260 xmlGenericError(xmlGenericErrorContext,
12261 "xmlParseChunk: encoder error\n");
12262 return(XML_ERR_INVALID_ENCODING);
12263 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012264 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012265 }
12266 }
12267 }
Daniel Veillard65686452012-07-19 18:25:01 +080012268 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012269 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012270 } else {
12271 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12272 avail = xmlBufUse(ctxt->input->buf->buffer);
12273 /*
12274 * Depending on the current state it may not be such
12275 * a good idea to try parsing if there is nothing in the chunk
12276 * which would be worth doing a parser state transition and we
12277 * need to wait for more data
12278 */
12279 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12280 (old_avail == 0) || (avail == 0) ||
12281 (xmlParseCheckTransition(ctxt,
12282 (const char *)&ctxt->input->base[old_avail],
12283 avail - old_avail)))
12284 xmlParseTryOrFinish(ctxt, terminate);
12285 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012286 if (ctxt->instate == XML_PARSER_EOF)
12287 return(ctxt->errNo);
12288
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012289 if ((ctxt->input != NULL) &&
12290 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12291 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12292 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12293 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12294 ctxt->instate = XML_PARSER_EOF;
12295 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012296 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12297 return(ctxt->errNo);
12298
12299 if (remain != 0) {
12300 chunk += size;
12301 size = remain;
12302 remain = 0;
12303 goto xmldecl_done;
12304 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012305 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12306 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012307 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12308 ctxt->input);
12309 size_t current = ctxt->input->cur - ctxt->input->base;
12310
Daniel Veillarda617e242006-01-09 14:38:44 +000012311 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012312
12313 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12314 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012315 }
Owen Taylor3473f882001-02-23 17:55:21 +000012316 if (terminate) {
12317 /*
12318 * Check for termination
12319 */
Daniel Veillard65686452012-07-19 18:25:01 +080012320 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012321
12322 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012323 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012324 cur_avail = ctxt->input->length -
12325 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012326 else
Daniel Veillard65686452012-07-19 18:25:01 +080012327 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12328 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012329 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012330
Owen Taylor3473f882001-02-23 17:55:21 +000012331 if ((ctxt->instate != XML_PARSER_EOF) &&
12332 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012333 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012334 }
Daniel Veillard65686452012-07-19 18:25:01 +080012335 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012336 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012337 }
Owen Taylor3473f882001-02-23 17:55:21 +000012338 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012339 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012340 ctxt->sax->endDocument(ctxt->userData);
12341 }
12342 ctxt->instate = XML_PARSER_EOF;
12343 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012344 if (ctxt->wellFormed == 0)
12345 return((xmlParserErrors) ctxt->errNo);
12346 else
12347 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012348}
12349
12350/************************************************************************
12351 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012352 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012353 * *
12354 ************************************************************************/
12355
12356/**
Owen Taylor3473f882001-02-23 17:55:21 +000012357 * xmlCreatePushParserCtxt:
12358 * @sax: a SAX handler
12359 * @user_data: The user data returned on SAX callbacks
12360 * @chunk: a pointer to an array of chars
12361 * @size: number of chars in the array
12362 * @filename: an optional file name or URI
12363 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012364 * Create a parser context for using the XML parser in push mode.
12365 * If @buffer and @size are non-NULL, the data is used to detect
12366 * the encoding. The remaining characters will be parsed so they
12367 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012368 * To allow content encoding detection, @size should be >= 4
12369 * The value of @filename is used for fetching external entities
12370 * and error/warning reports.
12371 *
12372 * Returns the new parser context or NULL
12373 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012374
Owen Taylor3473f882001-02-23 17:55:21 +000012375xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012376xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012377 const char *chunk, int size, const char *filename) {
12378 xmlParserCtxtPtr ctxt;
12379 xmlParserInputPtr inputStream;
12380 xmlParserInputBufferPtr buf;
12381 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12382
12383 /*
12384 * plug some encoding conversion routines
12385 */
12386 if ((chunk != NULL) && (size >= 4))
12387 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12388
12389 buf = xmlAllocParserInputBuffer(enc);
12390 if (buf == NULL) return(NULL);
12391
12392 ctxt = xmlNewParserCtxt();
12393 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012394 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012395 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012396 return(NULL);
12397 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012398 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012399 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12400 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012401 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012402 xmlFreeParserInputBuffer(buf);
12403 xmlFreeParserCtxt(ctxt);
12404 return(NULL);
12405 }
Owen Taylor3473f882001-02-23 17:55:21 +000012406 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012407#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012408 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012409#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012410 xmlFree(ctxt->sax);
12411 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12412 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012413 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012414 xmlFreeParserInputBuffer(buf);
12415 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012416 return(NULL);
12417 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012418 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12419 if (sax->initialized == XML_SAX2_MAGIC)
12420 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12421 else
12422 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012423 if (user_data != NULL)
12424 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012425 }
Owen Taylor3473f882001-02-23 17:55:21 +000012426 if (filename == NULL) {
12427 ctxt->directory = NULL;
12428 } else {
12429 ctxt->directory = xmlParserGetDirectory(filename);
12430 }
12431
12432 inputStream = xmlNewInputStream(ctxt);
12433 if (inputStream == NULL) {
12434 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012435 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012436 return(NULL);
12437 }
12438
12439 if (filename == NULL)
12440 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012441 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012442 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012443 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012444 if (inputStream->filename == NULL) {
12445 xmlFreeParserCtxt(ctxt);
12446 xmlFreeParserInputBuffer(buf);
12447 return(NULL);
12448 }
12449 }
Owen Taylor3473f882001-02-23 17:55:21 +000012450 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012451 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012452 inputPush(ctxt, inputStream);
12453
William M. Brack3a1cd212005-02-11 14:35:54 +000012454 /*
12455 * If the caller didn't provide an initial 'chunk' for determining
12456 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12457 * that it can be automatically determined later
12458 */
12459 if ((size == 0) || (chunk == NULL)) {
12460 ctxt->charset = XML_CHAR_ENCODING_NONE;
12461 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012462 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12463 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012464
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012465 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012466
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012467 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012468#ifdef DEBUG_PUSH
12469 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12470#endif
12471 }
12472
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012473 if (enc != XML_CHAR_ENCODING_NONE) {
12474 xmlSwitchEncoding(ctxt, enc);
12475 }
12476
Owen Taylor3473f882001-02-23 17:55:21 +000012477 return(ctxt);
12478}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012479#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012480
12481/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012482 * xmlStopParser:
12483 * @ctxt: an XML parser context
12484 *
12485 * Blocks further parser processing
12486 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012487void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012488xmlStopParser(xmlParserCtxtPtr ctxt) {
12489 if (ctxt == NULL)
12490 return;
12491 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarde50ba812013-04-11 15:54:51 +080012492 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012493 ctxt->disableSAX = 1;
12494 if (ctxt->input != NULL) {
12495 ctxt->input->cur = BAD_CAST"";
12496 ctxt->input->base = ctxt->input->cur;
12497 }
12498}
12499
12500/**
Owen Taylor3473f882001-02-23 17:55:21 +000012501 * xmlCreateIOParserCtxt:
12502 * @sax: a SAX handler
12503 * @user_data: The user data returned on SAX callbacks
12504 * @ioread: an I/O read function
12505 * @ioclose: an I/O close function
12506 * @ioctx: an I/O handler
12507 * @enc: the charset encoding if known
12508 *
12509 * Create a parser context for using the XML parser with an existing
12510 * I/O stream
12511 *
12512 * Returns the new parser context or NULL
12513 */
12514xmlParserCtxtPtr
12515xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12516 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12517 void *ioctx, xmlCharEncoding enc) {
12518 xmlParserCtxtPtr ctxt;
12519 xmlParserInputPtr inputStream;
12520 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012521
Daniel Veillard42595322004-11-08 10:52:06 +000012522 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012523
12524 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012525 if (buf == NULL) {
12526 if (ioclose != NULL)
12527 ioclose(ioctx);
12528 return (NULL);
12529 }
Owen Taylor3473f882001-02-23 17:55:21 +000012530
12531 ctxt = xmlNewParserCtxt();
12532 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012533 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012534 return(NULL);
12535 }
12536 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012537#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012538 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012539#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012540 xmlFree(ctxt->sax);
12541 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12542 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012543 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012544 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012545 return(NULL);
12546 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012547 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12548 if (sax->initialized == XML_SAX2_MAGIC)
12549 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12550 else
12551 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012552 if (user_data != NULL)
12553 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012554 }
Owen Taylor3473f882001-02-23 17:55:21 +000012555
12556 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12557 if (inputStream == NULL) {
12558 xmlFreeParserCtxt(ctxt);
12559 return(NULL);
12560 }
12561 inputPush(ctxt, inputStream);
12562
12563 return(ctxt);
12564}
12565
Daniel Veillard4432df22003-09-28 18:58:27 +000012566#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012567/************************************************************************
12568 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012569 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012570 * *
12571 ************************************************************************/
12572
12573/**
12574 * xmlIOParseDTD:
12575 * @sax: the SAX handler block or NULL
12576 * @input: an Input Buffer
12577 * @enc: the charset encoding if known
12578 *
12579 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012580 *
Owen Taylor3473f882001-02-23 17:55:21 +000012581 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012582 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012583 */
12584
12585xmlDtdPtr
12586xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12587 xmlCharEncoding enc) {
12588 xmlDtdPtr ret = NULL;
12589 xmlParserCtxtPtr ctxt;
12590 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012591 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012592
12593 if (input == NULL)
12594 return(NULL);
12595
12596 ctxt = xmlNewParserCtxt();
12597 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012598 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012599 return(NULL);
12600 }
12601
12602 /*
12603 * Set-up the SAX context
12604 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012605 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012606 if (ctxt->sax != NULL)
12607 xmlFree(ctxt->sax);
12608 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012609 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012610 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012611 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012612
12613 /*
12614 * generate a parser input from the I/O handler
12615 */
12616
Daniel Veillard43caefb2003-12-07 19:32:22 +000012617 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012618 if (pinput == NULL) {
12619 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012620 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012621 xmlFreeParserCtxt(ctxt);
12622 return(NULL);
12623 }
12624
12625 /*
12626 * plug some encoding conversion routines here.
12627 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012628 if (xmlPushInput(ctxt, pinput) < 0) {
12629 if (sax != NULL) ctxt->sax = NULL;
12630 xmlFreeParserCtxt(ctxt);
12631 return(NULL);
12632 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012633 if (enc != XML_CHAR_ENCODING_NONE) {
12634 xmlSwitchEncoding(ctxt, enc);
12635 }
Owen Taylor3473f882001-02-23 17:55:21 +000012636
12637 pinput->filename = NULL;
12638 pinput->line = 1;
12639 pinput->col = 1;
12640 pinput->base = ctxt->input->cur;
12641 pinput->cur = ctxt->input->cur;
12642 pinput->free = NULL;
12643
12644 /*
12645 * let's parse that entity knowing it's an external subset.
12646 */
12647 ctxt->inSubset = 2;
12648 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012649 if (ctxt->myDoc == NULL) {
12650 xmlErrMemory(ctxt, "New Doc failed");
12651 return(NULL);
12652 }
12653 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012654 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12655 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012656
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012657 if ((enc == XML_CHAR_ENCODING_NONE) &&
12658 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012659 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012660 * Get the 4 first bytes and decode the charset
12661 * if enc != XML_CHAR_ENCODING_NONE
12662 * plug some encoding conversion routines.
12663 */
12664 start[0] = RAW;
12665 start[1] = NXT(1);
12666 start[2] = NXT(2);
12667 start[3] = NXT(3);
12668 enc = xmlDetectCharEncoding(start, 4);
12669 if (enc != XML_CHAR_ENCODING_NONE) {
12670 xmlSwitchEncoding(ctxt, enc);
12671 }
12672 }
12673
Owen Taylor3473f882001-02-23 17:55:21 +000012674 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12675
12676 if (ctxt->myDoc != NULL) {
12677 if (ctxt->wellFormed) {
12678 ret = ctxt->myDoc->extSubset;
12679 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012680 if (ret != NULL) {
12681 xmlNodePtr tmp;
12682
12683 ret->doc = NULL;
12684 tmp = ret->children;
12685 while (tmp != NULL) {
12686 tmp->doc = NULL;
12687 tmp = tmp->next;
12688 }
12689 }
Owen Taylor3473f882001-02-23 17:55:21 +000012690 } else {
12691 ret = NULL;
12692 }
12693 xmlFreeDoc(ctxt->myDoc);
12694 ctxt->myDoc = NULL;
12695 }
12696 if (sax != NULL) ctxt->sax = NULL;
12697 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012698
Owen Taylor3473f882001-02-23 17:55:21 +000012699 return(ret);
12700}
12701
12702/**
12703 * xmlSAXParseDTD:
12704 * @sax: the SAX handler block
12705 * @ExternalID: a NAME* containing the External ID of the DTD
12706 * @SystemID: a NAME* containing the URL to the DTD
12707 *
12708 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012709 *
Owen Taylor3473f882001-02-23 17:55:21 +000012710 * Returns the resulting xmlDtdPtr or NULL in case of error.
12711 */
12712
12713xmlDtdPtr
12714xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12715 const xmlChar *SystemID) {
12716 xmlDtdPtr ret = NULL;
12717 xmlParserCtxtPtr ctxt;
12718 xmlParserInputPtr input = NULL;
12719 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012720 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012721
12722 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12723
12724 ctxt = xmlNewParserCtxt();
12725 if (ctxt == NULL) {
12726 return(NULL);
12727 }
12728
12729 /*
12730 * Set-up the SAX context
12731 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012732 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012733 if (ctxt->sax != NULL)
12734 xmlFree(ctxt->sax);
12735 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012736 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012737 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012738
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012739 /*
12740 * Canonicalise the system ID
12741 */
12742 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012743 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012744 xmlFreeParserCtxt(ctxt);
12745 return(NULL);
12746 }
Owen Taylor3473f882001-02-23 17:55:21 +000012747
12748 /*
12749 * Ask the Entity resolver to load the damn thing
12750 */
12751
12752 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012753 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12754 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012755 if (input == NULL) {
12756 if (sax != NULL) ctxt->sax = NULL;
12757 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012758 if (systemIdCanonic != NULL)
12759 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012760 return(NULL);
12761 }
12762
12763 /*
12764 * plug some encoding conversion routines here.
12765 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012766 if (xmlPushInput(ctxt, input) < 0) {
12767 if (sax != NULL) ctxt->sax = NULL;
12768 xmlFreeParserCtxt(ctxt);
12769 if (systemIdCanonic != NULL)
12770 xmlFree(systemIdCanonic);
12771 return(NULL);
12772 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012773 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12774 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12775 xmlSwitchEncoding(ctxt, enc);
12776 }
Owen Taylor3473f882001-02-23 17:55:21 +000012777
12778 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012779 input->filename = (char *) systemIdCanonic;
12780 else
12781 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012782 input->line = 1;
12783 input->col = 1;
12784 input->base = ctxt->input->cur;
12785 input->cur = ctxt->input->cur;
12786 input->free = NULL;
12787
12788 /*
12789 * let's parse that entity knowing it's an external subset.
12790 */
12791 ctxt->inSubset = 2;
12792 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012793 if (ctxt->myDoc == NULL) {
12794 xmlErrMemory(ctxt, "New Doc failed");
12795 if (sax != NULL) ctxt->sax = NULL;
12796 xmlFreeParserCtxt(ctxt);
12797 return(NULL);
12798 }
12799 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012800 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12801 ExternalID, SystemID);
12802 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12803
12804 if (ctxt->myDoc != NULL) {
12805 if (ctxt->wellFormed) {
12806 ret = ctxt->myDoc->extSubset;
12807 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012808 if (ret != NULL) {
12809 xmlNodePtr tmp;
12810
12811 ret->doc = NULL;
12812 tmp = ret->children;
12813 while (tmp != NULL) {
12814 tmp->doc = NULL;
12815 tmp = tmp->next;
12816 }
12817 }
Owen Taylor3473f882001-02-23 17:55:21 +000012818 } else {
12819 ret = NULL;
12820 }
12821 xmlFreeDoc(ctxt->myDoc);
12822 ctxt->myDoc = NULL;
12823 }
12824 if (sax != NULL) ctxt->sax = NULL;
12825 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012826
Owen Taylor3473f882001-02-23 17:55:21 +000012827 return(ret);
12828}
12829
Daniel Veillard4432df22003-09-28 18:58:27 +000012830
Owen Taylor3473f882001-02-23 17:55:21 +000012831/**
12832 * xmlParseDTD:
12833 * @ExternalID: a NAME* containing the External ID of the DTD
12834 * @SystemID: a NAME* containing the URL to the DTD
12835 *
12836 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012837 *
Owen Taylor3473f882001-02-23 17:55:21 +000012838 * Returns the resulting xmlDtdPtr or NULL in case of error.
12839 */
12840
12841xmlDtdPtr
12842xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12843 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12844}
Daniel Veillard4432df22003-09-28 18:58:27 +000012845#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012846
12847/************************************************************************
12848 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012849 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012850 * *
12851 ************************************************************************/
12852
12853/**
Owen Taylor3473f882001-02-23 17:55:21 +000012854 * xmlParseCtxtExternalEntity:
12855 * @ctx: the existing parsing context
12856 * @URL: the URL for the entity to load
12857 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012858 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012859 *
12860 * Parse an external general entity within an existing parsing context
12861 * An external general parsed entity is well-formed if it matches the
12862 * production labeled extParsedEnt.
12863 *
12864 * [78] extParsedEnt ::= TextDecl? content
12865 *
12866 * Returns 0 if the entity is well formed, -1 in case of args problem and
12867 * the parser error code otherwise
12868 */
12869
12870int
12871xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012872 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012873 xmlParserCtxtPtr ctxt;
12874 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012875 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012876 xmlSAXHandlerPtr oldsax = NULL;
12877 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012878 xmlChar start[4];
12879 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012880
Daniel Veillardce682bc2004-11-05 17:22:25 +000012881 if (ctx == NULL) return(-1);
12882
Daniel Veillard0161e632008-08-28 15:36:32 +000012883 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12884 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012885 return(XML_ERR_ENTITY_LOOP);
12886 }
12887
Daniel Veillardcda96922001-08-21 10:56:31 +000012888 if (lst != NULL)
12889 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012890 if ((URL == NULL) && (ID == NULL))
12891 return(-1);
12892 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12893 return(-1);
12894
Rob Richards798743a2009-06-19 13:54:25 -040012895 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012896 if (ctxt == NULL) {
12897 return(-1);
12898 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012899
Owen Taylor3473f882001-02-23 17:55:21 +000012900 oldsax = ctxt->sax;
12901 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012902 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012903 newDoc = xmlNewDoc(BAD_CAST "1.0");
12904 if (newDoc == NULL) {
12905 xmlFreeParserCtxt(ctxt);
12906 return(-1);
12907 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012908 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012909 if (ctx->myDoc->dict) {
12910 newDoc->dict = ctx->myDoc->dict;
12911 xmlDictReference(newDoc->dict);
12912 }
Owen Taylor3473f882001-02-23 17:55:21 +000012913 if (ctx->myDoc != NULL) {
12914 newDoc->intSubset = ctx->myDoc->intSubset;
12915 newDoc->extSubset = ctx->myDoc->extSubset;
12916 }
12917 if (ctx->myDoc->URL != NULL) {
12918 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12919 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012920 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12921 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012922 ctxt->sax = oldsax;
12923 xmlFreeParserCtxt(ctxt);
12924 newDoc->intSubset = NULL;
12925 newDoc->extSubset = NULL;
12926 xmlFreeDoc(newDoc);
12927 return(-1);
12928 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012929 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012930 nodePush(ctxt, newDoc->children);
12931 if (ctx->myDoc == NULL) {
12932 ctxt->myDoc = newDoc;
12933 } else {
12934 ctxt->myDoc = ctx->myDoc;
12935 newDoc->children->doc = ctx->myDoc;
12936 }
12937
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012938 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012939 * Get the 4 first bytes and decode the charset
12940 * if enc != XML_CHAR_ENCODING_NONE
12941 * plug some encoding conversion routines.
12942 */
12943 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012944 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12945 start[0] = RAW;
12946 start[1] = NXT(1);
12947 start[2] = NXT(2);
12948 start[3] = NXT(3);
12949 enc = xmlDetectCharEncoding(start, 4);
12950 if (enc != XML_CHAR_ENCODING_NONE) {
12951 xmlSwitchEncoding(ctxt, enc);
12952 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012953 }
12954
Owen Taylor3473f882001-02-23 17:55:21 +000012955 /*
12956 * Parse a possible text declaration first
12957 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012958 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012959 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012960 /*
12961 * An XML-1.0 document can't reference an entity not XML-1.0
12962 */
12963 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12964 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012965 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012966 "Version mismatch between document and entity\n");
12967 }
Owen Taylor3473f882001-02-23 17:55:21 +000012968 }
12969
12970 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012971 * If the user provided its own SAX callbacks then reuse the
12972 * useData callback field, otherwise the expected setup in a
12973 * DOM builder is to have userData == ctxt
12974 */
12975 if (ctx->userData == ctx)
12976 ctxt->userData = ctxt;
12977 else
12978 ctxt->userData = ctx->userData;
12979
12980 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012981 * Doing validity checking on chunk doesn't make sense
12982 */
12983 ctxt->instate = XML_PARSER_CONTENT;
12984 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012985 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012986 ctxt->loadsubset = ctx->loadsubset;
12987 ctxt->depth = ctx->depth + 1;
12988 ctxt->replaceEntities = ctx->replaceEntities;
12989 if (ctxt->validate) {
12990 ctxt->vctxt.error = ctx->vctxt.error;
12991 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012992 } else {
12993 ctxt->vctxt.error = NULL;
12994 ctxt->vctxt.warning = NULL;
12995 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012996 ctxt->vctxt.nodeTab = NULL;
12997 ctxt->vctxt.nodeNr = 0;
12998 ctxt->vctxt.nodeMax = 0;
12999 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013000 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13001 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013002 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13003 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13004 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013005 ctxt->dictNames = ctx->dictNames;
13006 ctxt->attsDefault = ctx->attsDefault;
13007 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013008 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013009
13010 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013011
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013012 ctx->validate = ctxt->validate;
13013 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013014 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013015 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013016 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013017 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013018 }
13019 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013020 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013021 }
13022
13023 if (!ctxt->wellFormed) {
13024 if (ctxt->errNo == 0)
13025 ret = 1;
13026 else
13027 ret = ctxt->errNo;
13028 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013029 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013030 xmlNodePtr cur;
13031
13032 /*
13033 * Return the newly created nodeset after unlinking it from
13034 * they pseudo parent.
13035 */
13036 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013037 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013038 while (cur != NULL) {
13039 cur->parent = NULL;
13040 cur = cur->next;
13041 }
13042 newDoc->children->children = NULL;
13043 }
13044 ret = 0;
13045 }
13046 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013047 ctxt->dict = NULL;
13048 ctxt->attsDefault = NULL;
13049 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013050 xmlFreeParserCtxt(ctxt);
13051 newDoc->intSubset = NULL;
13052 newDoc->extSubset = NULL;
13053 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013054
Owen Taylor3473f882001-02-23 17:55:21 +000013055 return(ret);
13056}
13057
13058/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013059 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013060 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013061 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013062 * @sax: the SAX handler bloc (possibly NULL)
13063 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13064 * @depth: Used for loop detection, use 0
13065 * @URL: the URL for the entity to load
13066 * @ID: the System ID for the entity to load
13067 * @list: the return value for the set of parsed nodes
13068 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013069 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013070 *
13071 * Returns 0 if the entity is well formed, -1 in case of args problem and
13072 * the parser error code otherwise
13073 */
13074
Daniel Veillard7d515752003-09-26 19:12:37 +000013075static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013076xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13077 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013078 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013079 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013080 xmlParserCtxtPtr ctxt;
13081 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013082 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013083 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013084 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013085 xmlChar start[4];
13086 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013087
Daniel Veillard0161e632008-08-28 15:36:32 +000013088 if (((depth > 40) &&
13089 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13090 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013091 return(XML_ERR_ENTITY_LOOP);
13092 }
13093
Owen Taylor3473f882001-02-23 17:55:21 +000013094 if (list != NULL)
13095 *list = NULL;
13096 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013097 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013098 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013099 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013100
13101
Rob Richards9c0aa472009-03-26 18:10:19 +000013102 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013103 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013104 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013105 if (oldctxt != NULL) {
13106 ctxt->_private = oldctxt->_private;
13107 ctxt->loadsubset = oldctxt->loadsubset;
13108 ctxt->validate = oldctxt->validate;
13109 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013110 ctxt->record_info = oldctxt->record_info;
13111 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13112 ctxt->node_seq.length = oldctxt->node_seq.length;
13113 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013114 } else {
13115 /*
13116 * Doing validity checking on chunk without context
13117 * doesn't make sense
13118 */
13119 ctxt->_private = NULL;
13120 ctxt->validate = 0;
13121 ctxt->external = 2;
13122 ctxt->loadsubset = 0;
13123 }
Owen Taylor3473f882001-02-23 17:55:21 +000013124 if (sax != NULL) {
13125 oldsax = ctxt->sax;
13126 ctxt->sax = sax;
13127 if (user_data != NULL)
13128 ctxt->userData = user_data;
13129 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013130 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013131 newDoc = xmlNewDoc(BAD_CAST "1.0");
13132 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013133 ctxt->node_seq.maximum = 0;
13134 ctxt->node_seq.length = 0;
13135 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013136 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013137 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013138 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013139 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013140 newDoc->intSubset = doc->intSubset;
13141 newDoc->extSubset = doc->extSubset;
13142 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013143 xmlDictReference(newDoc->dict);
13144
Owen Taylor3473f882001-02-23 17:55:21 +000013145 if (doc->URL != NULL) {
13146 newDoc->URL = xmlStrdup(doc->URL);
13147 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013148 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13149 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013150 if (sax != NULL)
13151 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013152 ctxt->node_seq.maximum = 0;
13153 ctxt->node_seq.length = 0;
13154 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013155 xmlFreeParserCtxt(ctxt);
13156 newDoc->intSubset = NULL;
13157 newDoc->extSubset = NULL;
13158 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013159 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013160 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013161 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013162 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013163 ctxt->myDoc = doc;
13164 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013165
Daniel Veillard0161e632008-08-28 15:36:32 +000013166 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013167 * Get the 4 first bytes and decode the charset
13168 * if enc != XML_CHAR_ENCODING_NONE
13169 * plug some encoding conversion routines.
13170 */
13171 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013172 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13173 start[0] = RAW;
13174 start[1] = NXT(1);
13175 start[2] = NXT(2);
13176 start[3] = NXT(3);
13177 enc = xmlDetectCharEncoding(start, 4);
13178 if (enc != XML_CHAR_ENCODING_NONE) {
13179 xmlSwitchEncoding(ctxt, enc);
13180 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013181 }
13182
Owen Taylor3473f882001-02-23 17:55:21 +000013183 /*
13184 * Parse a possible text declaration first
13185 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013186 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013187 xmlParseTextDecl(ctxt);
13188 }
13189
Owen Taylor3473f882001-02-23 17:55:21 +000013190 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013191 ctxt->depth = depth;
13192
13193 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013194
Daniel Veillard561b7f82002-03-20 21:55:57 +000013195 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013196 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013197 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013198 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013199 }
13200 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013201 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013202 }
13203
13204 if (!ctxt->wellFormed) {
13205 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013206 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013207 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013208 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013209 } else {
13210 if (list != NULL) {
13211 xmlNodePtr cur;
13212
13213 /*
13214 * Return the newly created nodeset after unlinking it from
13215 * they pseudo parent.
13216 */
13217 cur = newDoc->children->children;
13218 *list = cur;
13219 while (cur != NULL) {
13220 cur->parent = NULL;
13221 cur = cur->next;
13222 }
13223 newDoc->children->children = NULL;
13224 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013225 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013226 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013227
13228 /*
13229 * Record in the parent context the number of entities replacement
13230 * done when parsing that reference.
13231 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013232 if (oldctxt != NULL)
13233 oldctxt->nbentities += ctxt->nbentities;
13234
Daniel Veillard0161e632008-08-28 15:36:32 +000013235 /*
13236 * Also record the size of the entity parsed
13237 */
13238 if (ctxt->input != NULL) {
13239 oldctxt->sizeentities += ctxt->input->consumed;
13240 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13241 }
13242 /*
13243 * And record the last error if any
13244 */
13245 if (ctxt->lastError.code != XML_ERR_OK)
13246 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13247
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013248 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013249 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013250 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13251 oldctxt->node_seq.length = ctxt->node_seq.length;
13252 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013253 ctxt->node_seq.maximum = 0;
13254 ctxt->node_seq.length = 0;
13255 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013256 xmlFreeParserCtxt(ctxt);
13257 newDoc->intSubset = NULL;
13258 newDoc->extSubset = NULL;
13259 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013260
Owen Taylor3473f882001-02-23 17:55:21 +000013261 return(ret);
13262}
13263
Daniel Veillard81273902003-09-30 00:43:48 +000013264#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013265/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013266 * xmlParseExternalEntity:
13267 * @doc: the document the chunk pertains to
13268 * @sax: the SAX handler bloc (possibly NULL)
13269 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13270 * @depth: Used for loop detection, use 0
13271 * @URL: the URL for the entity to load
13272 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013273 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013274 *
13275 * Parse an external general entity
13276 * An external general parsed entity is well-formed if it matches the
13277 * production labeled extParsedEnt.
13278 *
13279 * [78] extParsedEnt ::= TextDecl? content
13280 *
13281 * Returns 0 if the entity is well formed, -1 in case of args problem and
13282 * the parser error code otherwise
13283 */
13284
13285int
13286xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013287 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013288 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013289 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013290}
13291
13292/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013293 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013294 * @doc: the document the chunk pertains to
13295 * @sax: the SAX handler bloc (possibly NULL)
13296 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13297 * @depth: Used for loop detection, use 0
13298 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013299 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013300 *
13301 * Parse a well-balanced chunk of an XML document
13302 * called by the parser
13303 * The allowed sequence for the Well Balanced Chunk is the one defined by
13304 * the content production in the XML grammar:
13305 *
13306 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13307 *
13308 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13309 * the parser error code otherwise
13310 */
13311
13312int
13313xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013314 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013315 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13316 depth, string, lst, 0 );
13317}
Daniel Veillard81273902003-09-30 00:43:48 +000013318#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013319
13320/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013321 * xmlParseBalancedChunkMemoryInternal:
13322 * @oldctxt: the existing parsing context
13323 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13324 * @user_data: the user data field for the parser context
13325 * @lst: the return value for the set of parsed nodes
13326 *
13327 *
13328 * Parse a well-balanced chunk of an XML document
13329 * called by the parser
13330 * The allowed sequence for the Well Balanced Chunk is the one defined by
13331 * the content production in the XML grammar:
13332 *
13333 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13334 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013335 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13336 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013337 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013338 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013339 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013340 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013341static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013342xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13343 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13344 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013345 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013346 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013347 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013348 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013349 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013350 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013351 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013352#ifdef SAX2
13353 int i;
13354#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013355
Daniel Veillard0161e632008-08-28 15:36:32 +000013356 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13357 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013358 return(XML_ERR_ENTITY_LOOP);
13359 }
13360
13361
13362 if (lst != NULL)
13363 *lst = NULL;
13364 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013365 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013366
13367 size = xmlStrlen(string);
13368
13369 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013370 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013371 if (user_data != NULL)
13372 ctxt->userData = user_data;
13373 else
13374 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013375 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13376 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013377 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13378 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13379 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013380
Daniel Veillard74eaec12009-08-26 15:57:20 +020013381#ifdef SAX2
13382 /* propagate namespaces down the entity */
13383 for (i = 0;i < oldctxt->nsNr;i += 2) {
13384 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13385 }
13386#endif
13387
Daniel Veillard328f48c2002-11-15 15:24:34 +000013388 oldsax = ctxt->sax;
13389 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013390 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013391 ctxt->replaceEntities = oldctxt->replaceEntities;
13392 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013393
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013394 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013395 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013396 newDoc = xmlNewDoc(BAD_CAST "1.0");
13397 if (newDoc == NULL) {
13398 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013399 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013400 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013401 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013402 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013403 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013404 newDoc->dict = ctxt->dict;
13405 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013406 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013407 } else {
13408 ctxt->myDoc = oldctxt->myDoc;
13409 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013410 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013411 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013412 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13413 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013414 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013415 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013416 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013417 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013418 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013419 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013420 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013421 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013422 ctxt->myDoc->children = NULL;
13423 ctxt->myDoc->last = NULL;
13424 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013425 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013426 ctxt->instate = XML_PARSER_CONTENT;
13427 ctxt->depth = oldctxt->depth + 1;
13428
Daniel Veillard328f48c2002-11-15 15:24:34 +000013429 ctxt->validate = 0;
13430 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013431 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13432 /*
13433 * ID/IDREF registration will be done in xmlValidateElement below
13434 */
13435 ctxt->loadsubset |= XML_SKIP_IDS;
13436 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013437 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013438 ctxt->attsDefault = oldctxt->attsDefault;
13439 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013440
Daniel Veillard68e9e742002-11-16 15:35:11 +000013441 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013442 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013443 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013444 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013445 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013446 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013447 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013448 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013449 }
13450
13451 if (!ctxt->wellFormed) {
13452 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013453 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013454 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013455 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013456 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013457 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013458 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013459
William M. Brack7b9154b2003-09-27 19:23:50 +000013460 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013461 xmlNodePtr cur;
13462
13463 /*
13464 * Return the newly created nodeset after unlinking it from
13465 * they pseudo parent.
13466 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013467 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013468 *lst = cur;
13469 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013470#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013471 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13472 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13473 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013474 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13475 oldctxt->myDoc, cur);
13476 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013477#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013478 cur->parent = NULL;
13479 cur = cur->next;
13480 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013481 ctxt->myDoc->children->children = NULL;
13482 }
13483 if (ctxt->myDoc != NULL) {
13484 xmlFreeNode(ctxt->myDoc->children);
13485 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013486 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013487 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013488
13489 /*
13490 * Record in the parent context the number of entities replacement
13491 * done when parsing that reference.
13492 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013493 if (oldctxt != NULL)
13494 oldctxt->nbentities += ctxt->nbentities;
13495
Daniel Veillard0161e632008-08-28 15:36:32 +000013496 /*
13497 * Also record the last error if any
13498 */
13499 if (ctxt->lastError.code != XML_ERR_OK)
13500 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13501
Daniel Veillard328f48c2002-11-15 15:24:34 +000013502 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013503 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013504 ctxt->attsDefault = NULL;
13505 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013506 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013507 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013508 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013509 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013510
Daniel Veillard328f48c2002-11-15 15:24:34 +000013511 return(ret);
13512}
13513
Daniel Veillard29b17482004-08-16 00:39:03 +000013514/**
13515 * xmlParseInNodeContext:
13516 * @node: the context node
13517 * @data: the input string
13518 * @datalen: the input string length in bytes
13519 * @options: a combination of xmlParserOption
13520 * @lst: the return value for the set of parsed nodes
13521 *
13522 * Parse a well-balanced chunk of an XML document
13523 * within the context (DTD, namespaces, etc ...) of the given node.
13524 *
13525 * The allowed sequence for the data is a Well Balanced Chunk defined by
13526 * the content production in the XML grammar:
13527 *
13528 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13529 *
13530 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13531 * error code otherwise
13532 */
13533xmlParserErrors
13534xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13535 int options, xmlNodePtr *lst) {
13536#ifdef SAX2
13537 xmlParserCtxtPtr ctxt;
13538 xmlDocPtr doc = NULL;
13539 xmlNodePtr fake, cur;
13540 int nsnr = 0;
13541
13542 xmlParserErrors ret = XML_ERR_OK;
13543
13544 /*
13545 * check all input parameters, grab the document
13546 */
13547 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13548 return(XML_ERR_INTERNAL_ERROR);
13549 switch (node->type) {
13550 case XML_ELEMENT_NODE:
13551 case XML_ATTRIBUTE_NODE:
13552 case XML_TEXT_NODE:
13553 case XML_CDATA_SECTION_NODE:
13554 case XML_ENTITY_REF_NODE:
13555 case XML_PI_NODE:
13556 case XML_COMMENT_NODE:
13557 case XML_DOCUMENT_NODE:
13558 case XML_HTML_DOCUMENT_NODE:
13559 break;
13560 default:
13561 return(XML_ERR_INTERNAL_ERROR);
13562
13563 }
13564 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13565 (node->type != XML_DOCUMENT_NODE) &&
13566 (node->type != XML_HTML_DOCUMENT_NODE))
13567 node = node->parent;
13568 if (node == NULL)
13569 return(XML_ERR_INTERNAL_ERROR);
13570 if (node->type == XML_ELEMENT_NODE)
13571 doc = node->doc;
13572 else
13573 doc = (xmlDocPtr) node;
13574 if (doc == NULL)
13575 return(XML_ERR_INTERNAL_ERROR);
13576
13577 /*
13578 * allocate a context and set-up everything not related to the
13579 * node position in the tree
13580 */
13581 if (doc->type == XML_DOCUMENT_NODE)
13582 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13583#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013584 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013585 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013586 /*
13587 * When parsing in context, it makes no sense to add implied
13588 * elements like html/body/etc...
13589 */
13590 options |= HTML_PARSE_NOIMPLIED;
13591 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013592#endif
13593 else
13594 return(XML_ERR_INTERNAL_ERROR);
13595
13596 if (ctxt == NULL)
13597 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013598
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013599 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013600 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13601 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13602 * we must wait until the last moment to free the original one.
13603 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013604 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013605 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013606 xmlDictFree(ctxt->dict);
13607 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013608 } else
13609 options |= XML_PARSE_NODICT;
13610
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013611 if (doc->encoding != NULL) {
13612 xmlCharEncodingHandlerPtr hdlr;
13613
13614 if (ctxt->encoding != NULL)
13615 xmlFree((xmlChar *) ctxt->encoding);
13616 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13617
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013618 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013619 if (hdlr != NULL) {
13620 xmlSwitchToEncoding(ctxt, hdlr);
13621 } else {
13622 return(XML_ERR_UNSUPPORTED_ENCODING);
13623 }
13624 }
13625
Daniel Veillard37334572008-07-31 08:20:02 +000013626 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013627 xmlDetectSAX2(ctxt);
13628 ctxt->myDoc = doc;
13629
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013630 fake = xmlNewComment(NULL);
13631 if (fake == NULL) {
13632 xmlFreeParserCtxt(ctxt);
13633 return(XML_ERR_NO_MEMORY);
13634 }
13635 xmlAddChild(node, fake);
13636
Daniel Veillard29b17482004-08-16 00:39:03 +000013637 if (node->type == XML_ELEMENT_NODE) {
13638 nodePush(ctxt, node);
13639 /*
13640 * initialize the SAX2 namespaces stack
13641 */
13642 cur = node;
13643 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13644 xmlNsPtr ns = cur->nsDef;
13645 const xmlChar *iprefix, *ihref;
13646
13647 while (ns != NULL) {
13648 if (ctxt->dict) {
13649 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13650 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13651 } else {
13652 iprefix = ns->prefix;
13653 ihref = ns->href;
13654 }
13655
13656 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13657 nsPush(ctxt, iprefix, ihref);
13658 nsnr++;
13659 }
13660 ns = ns->next;
13661 }
13662 cur = cur->parent;
13663 }
13664 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013665 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013666
13667 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13668 /*
13669 * ID/IDREF registration will be done in xmlValidateElement below
13670 */
13671 ctxt->loadsubset |= XML_SKIP_IDS;
13672 }
13673
Daniel Veillard499cc922006-01-18 17:22:35 +000013674#ifdef LIBXML_HTML_ENABLED
13675 if (doc->type == XML_HTML_DOCUMENT_NODE)
13676 __htmlParseContent(ctxt);
13677 else
13678#endif
13679 xmlParseContent(ctxt);
13680
Daniel Veillard29b17482004-08-16 00:39:03 +000013681 nsPop(ctxt, nsnr);
13682 if ((RAW == '<') && (NXT(1) == '/')) {
13683 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13684 } else if (RAW != 0) {
13685 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13686 }
13687 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13688 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13689 ctxt->wellFormed = 0;
13690 }
13691
13692 if (!ctxt->wellFormed) {
13693 if (ctxt->errNo == 0)
13694 ret = XML_ERR_INTERNAL_ERROR;
13695 else
13696 ret = (xmlParserErrors)ctxt->errNo;
13697 } else {
13698 ret = XML_ERR_OK;
13699 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013700
Daniel Veillard29b17482004-08-16 00:39:03 +000013701 /*
13702 * Return the newly created nodeset after unlinking it from
13703 * the pseudo sibling.
13704 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013705
Daniel Veillard29b17482004-08-16 00:39:03 +000013706 cur = fake->next;
13707 fake->next = NULL;
13708 node->last = fake;
13709
13710 if (cur != NULL) {
13711 cur->prev = NULL;
13712 }
13713
13714 *lst = cur;
13715
13716 while (cur != NULL) {
13717 cur->parent = NULL;
13718 cur = cur->next;
13719 }
13720
13721 xmlUnlinkNode(fake);
13722 xmlFreeNode(fake);
13723
13724
13725 if (ret != XML_ERR_OK) {
13726 xmlFreeNodeList(*lst);
13727 *lst = NULL;
13728 }
William M. Brackc3f81342004-10-03 01:22:44 +000013729
William M. Brackb7b54de2004-10-06 16:38:01 +000013730 if (doc->dict != NULL)
13731 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013732 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013733
Daniel Veillard29b17482004-08-16 00:39:03 +000013734 return(ret);
13735#else /* !SAX2 */
13736 return(XML_ERR_INTERNAL_ERROR);
13737#endif
13738}
13739
Daniel Veillard81273902003-09-30 00:43:48 +000013740#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013741/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013742 * xmlParseBalancedChunkMemoryRecover:
13743 * @doc: the document the chunk pertains to
13744 * @sax: the SAX handler bloc (possibly NULL)
13745 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13746 * @depth: Used for loop detection, use 0
13747 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13748 * @lst: the return value for the set of parsed nodes
13749 * @recover: return nodes even if the data is broken (use 0)
13750 *
13751 *
13752 * Parse a well-balanced chunk of an XML document
13753 * called by the parser
13754 * The allowed sequence for the Well Balanced Chunk is the one defined by
13755 * the content production in the XML grammar:
13756 *
13757 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13758 *
13759 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13760 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013761 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013762 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013763 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13764 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013765 */
13766int
13767xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013768 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013769 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013770 xmlParserCtxtPtr ctxt;
13771 xmlDocPtr newDoc;
13772 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013773 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013774 int size;
13775 int ret = 0;
13776
Daniel Veillard0161e632008-08-28 15:36:32 +000013777 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013778 return(XML_ERR_ENTITY_LOOP);
13779 }
13780
13781
Daniel Veillardcda96922001-08-21 10:56:31 +000013782 if (lst != NULL)
13783 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013784 if (string == NULL)
13785 return(-1);
13786
13787 size = xmlStrlen(string);
13788
13789 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13790 if (ctxt == NULL) return(-1);
13791 ctxt->userData = ctxt;
13792 if (sax != NULL) {
13793 oldsax = ctxt->sax;
13794 ctxt->sax = sax;
13795 if (user_data != NULL)
13796 ctxt->userData = user_data;
13797 }
13798 newDoc = xmlNewDoc(BAD_CAST "1.0");
13799 if (newDoc == NULL) {
13800 xmlFreeParserCtxt(ctxt);
13801 return(-1);
13802 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013803 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013804 if ((doc != NULL) && (doc->dict != NULL)) {
13805 xmlDictFree(ctxt->dict);
13806 ctxt->dict = doc->dict;
13807 xmlDictReference(ctxt->dict);
13808 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13809 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13810 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13811 ctxt->dictNames = 1;
13812 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013813 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013814 }
Owen Taylor3473f882001-02-23 17:55:21 +000013815 if (doc != NULL) {
13816 newDoc->intSubset = doc->intSubset;
13817 newDoc->extSubset = doc->extSubset;
13818 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013819 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13820 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013821 if (sax != NULL)
13822 ctxt->sax = oldsax;
13823 xmlFreeParserCtxt(ctxt);
13824 newDoc->intSubset = NULL;
13825 newDoc->extSubset = NULL;
13826 xmlFreeDoc(newDoc);
13827 return(-1);
13828 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013829 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13830 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013831 if (doc == NULL) {
13832 ctxt->myDoc = newDoc;
13833 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013834 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013835 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013836 /* Ensure that doc has XML spec namespace */
13837 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13838 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013839 }
13840 ctxt->instate = XML_PARSER_CONTENT;
13841 ctxt->depth = depth;
13842
13843 /*
13844 * Doing validity checking on chunk doesn't make sense
13845 */
13846 ctxt->validate = 0;
13847 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013848 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013849
Daniel Veillardb39bc392002-10-26 19:29:51 +000013850 if ( doc != NULL ){
13851 content = doc->children;
13852 doc->children = NULL;
13853 xmlParseContent(ctxt);
13854 doc->children = content;
13855 }
13856 else {
13857 xmlParseContent(ctxt);
13858 }
Owen Taylor3473f882001-02-23 17:55:21 +000013859 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013860 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013861 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013862 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013863 }
13864 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013865 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013866 }
13867
13868 if (!ctxt->wellFormed) {
13869 if (ctxt->errNo == 0)
13870 ret = 1;
13871 else
13872 ret = ctxt->errNo;
13873 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013874 ret = 0;
13875 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013876
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013877 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13878 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013879
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013880 /*
13881 * Return the newly created nodeset after unlinking it from
13882 * they pseudo parent.
13883 */
13884 cur = newDoc->children->children;
13885 *lst = cur;
13886 while (cur != NULL) {
13887 xmlSetTreeDoc(cur, doc);
13888 cur->parent = NULL;
13889 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013890 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013891 newDoc->children->children = NULL;
13892 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013893
13894 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013895 ctxt->sax = oldsax;
13896 xmlFreeParserCtxt(ctxt);
13897 newDoc->intSubset = NULL;
13898 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013899 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013900 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013901
Owen Taylor3473f882001-02-23 17:55:21 +000013902 return(ret);
13903}
13904
13905/**
13906 * xmlSAXParseEntity:
13907 * @sax: the SAX handler block
13908 * @filename: the filename
13909 *
13910 * parse an XML external entity out of context and build a tree.
13911 * It use the given SAX function block to handle the parsing callback.
13912 * If sax is NULL, fallback to the default DOM tree building routines.
13913 *
13914 * [78] extParsedEnt ::= TextDecl? content
13915 *
13916 * This correspond to a "Well Balanced" chunk
13917 *
13918 * Returns the resulting document tree
13919 */
13920
13921xmlDocPtr
13922xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13923 xmlDocPtr ret;
13924 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013925
13926 ctxt = xmlCreateFileParserCtxt(filename);
13927 if (ctxt == NULL) {
13928 return(NULL);
13929 }
13930 if (sax != NULL) {
13931 if (ctxt->sax != NULL)
13932 xmlFree(ctxt->sax);
13933 ctxt->sax = sax;
13934 ctxt->userData = NULL;
13935 }
13936
Owen Taylor3473f882001-02-23 17:55:21 +000013937 xmlParseExtParsedEnt(ctxt);
13938
13939 if (ctxt->wellFormed)
13940 ret = ctxt->myDoc;
13941 else {
13942 ret = NULL;
13943 xmlFreeDoc(ctxt->myDoc);
13944 ctxt->myDoc = NULL;
13945 }
13946 if (sax != NULL)
13947 ctxt->sax = NULL;
13948 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013949
Owen Taylor3473f882001-02-23 17:55:21 +000013950 return(ret);
13951}
13952
13953/**
13954 * xmlParseEntity:
13955 * @filename: the filename
13956 *
13957 * parse an XML external entity out of context and build a tree.
13958 *
13959 * [78] extParsedEnt ::= TextDecl? content
13960 *
13961 * This correspond to a "Well Balanced" chunk
13962 *
13963 * Returns the resulting document tree
13964 */
13965
13966xmlDocPtr
13967xmlParseEntity(const char *filename) {
13968 return(xmlSAXParseEntity(NULL, filename));
13969}
Daniel Veillard81273902003-09-30 00:43:48 +000013970#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013971
13972/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013973 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013974 * @URL: the entity URL
13975 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013976 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013977 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013978 *
13979 * Create a parser context for an external entity
13980 * Automatic support for ZLIB/Compress compressed document is provided
13981 * by default if found at compile-time.
13982 *
13983 * Returns the new parser context or NULL
13984 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013985static xmlParserCtxtPtr
13986xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13987 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013988 xmlParserCtxtPtr ctxt;
13989 xmlParserInputPtr inputStream;
13990 char *directory = NULL;
13991 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013992
Owen Taylor3473f882001-02-23 17:55:21 +000013993 ctxt = xmlNewParserCtxt();
13994 if (ctxt == NULL) {
13995 return(NULL);
13996 }
13997
Daniel Veillard48247b42009-07-10 16:12:46 +020013998 if (pctx != NULL) {
13999 ctxt->options = pctx->options;
14000 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014001 }
14002
Owen Taylor3473f882001-02-23 17:55:21 +000014003 uri = xmlBuildURI(URL, base);
14004
14005 if (uri == NULL) {
14006 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14007 if (inputStream == NULL) {
14008 xmlFreeParserCtxt(ctxt);
14009 return(NULL);
14010 }
14011
14012 inputPush(ctxt, inputStream);
14013
14014 if ((ctxt->directory == NULL) && (directory == NULL))
14015 directory = xmlParserGetDirectory((char *)URL);
14016 if ((ctxt->directory == NULL) && (directory != NULL))
14017 ctxt->directory = directory;
14018 } else {
14019 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14020 if (inputStream == NULL) {
14021 xmlFree(uri);
14022 xmlFreeParserCtxt(ctxt);
14023 return(NULL);
14024 }
14025
14026 inputPush(ctxt, inputStream);
14027
14028 if ((ctxt->directory == NULL) && (directory == NULL))
14029 directory = xmlParserGetDirectory((char *)uri);
14030 if ((ctxt->directory == NULL) && (directory != NULL))
14031 ctxt->directory = directory;
14032 xmlFree(uri);
14033 }
Owen Taylor3473f882001-02-23 17:55:21 +000014034 return(ctxt);
14035}
14036
Rob Richards9c0aa472009-03-26 18:10:19 +000014037/**
14038 * xmlCreateEntityParserCtxt:
14039 * @URL: the entity URL
14040 * @ID: the entity PUBLIC ID
14041 * @base: a possible base for the target URI
14042 *
14043 * Create a parser context for an external entity
14044 * Automatic support for ZLIB/Compress compressed document is provided
14045 * by default if found at compile-time.
14046 *
14047 * Returns the new parser context or NULL
14048 */
14049xmlParserCtxtPtr
14050xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14051 const xmlChar *base) {
14052 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14053
14054}
14055
Owen Taylor3473f882001-02-23 17:55:21 +000014056/************************************************************************
14057 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014058 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014059 * *
14060 ************************************************************************/
14061
14062/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014063 * xmlCreateURLParserCtxt:
14064 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014065 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014066 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014067 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014068 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014069 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014070 *
14071 * Returns the new parser context or NULL
14072 */
14073xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014074xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014075{
14076 xmlParserCtxtPtr ctxt;
14077 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014078 char *directory = NULL;
14079
Owen Taylor3473f882001-02-23 17:55:21 +000014080 ctxt = xmlNewParserCtxt();
14081 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014082 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014083 return(NULL);
14084 }
14085
Daniel Veillarddf292f72005-01-16 19:00:15 +000014086 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014087 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014088 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014089
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014090 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014091 if (inputStream == NULL) {
14092 xmlFreeParserCtxt(ctxt);
14093 return(NULL);
14094 }
14095
Owen Taylor3473f882001-02-23 17:55:21 +000014096 inputPush(ctxt, inputStream);
14097 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014098 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014099 if ((ctxt->directory == NULL) && (directory != NULL))
14100 ctxt->directory = directory;
14101
14102 return(ctxt);
14103}
14104
Daniel Veillard61b93382003-11-03 14:28:31 +000014105/**
14106 * xmlCreateFileParserCtxt:
14107 * @filename: the filename
14108 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014109 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014110 * Automatic support for ZLIB/Compress compressed document is provided
14111 * by default if found at compile-time.
14112 *
14113 * Returns the new parser context or NULL
14114 */
14115xmlParserCtxtPtr
14116xmlCreateFileParserCtxt(const char *filename)
14117{
14118 return(xmlCreateURLParserCtxt(filename, 0));
14119}
14120
Daniel Veillard81273902003-09-30 00:43:48 +000014121#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014122/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014123 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014124 * @sax: the SAX handler block
14125 * @filename: the filename
14126 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14127 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014128 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014129 *
14130 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14131 * compressed document is provided by default if found at compile-time.
14132 * It use the given SAX function block to handle the parsing callback.
14133 * If sax is NULL, fallback to the default DOM tree building routines.
14134 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014135 * User data (void *) is stored within the parser context in the
14136 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014137 *
Owen Taylor3473f882001-02-23 17:55:21 +000014138 * Returns the resulting document tree
14139 */
14140
14141xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014142xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14143 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014144 xmlDocPtr ret;
14145 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014146
Daniel Veillard635ef722001-10-29 11:48:19 +000014147 xmlInitParser();
14148
Owen Taylor3473f882001-02-23 17:55:21 +000014149 ctxt = xmlCreateFileParserCtxt(filename);
14150 if (ctxt == NULL) {
14151 return(NULL);
14152 }
14153 if (sax != NULL) {
14154 if (ctxt->sax != NULL)
14155 xmlFree(ctxt->sax);
14156 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014157 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014158 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014159 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014160 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014161 }
Owen Taylor3473f882001-02-23 17:55:21 +000014162
Daniel Veillard37d2d162008-03-14 10:54:00 +000014163 if (ctxt->directory == NULL)
14164 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014165
Daniel Veillarddad3f682002-11-17 16:47:27 +000014166 ctxt->recovery = recovery;
14167
Owen Taylor3473f882001-02-23 17:55:21 +000014168 xmlParseDocument(ctxt);
14169
William M. Brackc07329e2003-09-08 01:57:30 +000014170 if ((ctxt->wellFormed) || recovery) {
14171 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014172 if (ret != NULL) {
14173 if (ctxt->input->buf->compressed > 0)
14174 ret->compression = 9;
14175 else
14176 ret->compression = ctxt->input->buf->compressed;
14177 }
William M. Brackc07329e2003-09-08 01:57:30 +000014178 }
Owen Taylor3473f882001-02-23 17:55:21 +000014179 else {
14180 ret = NULL;
14181 xmlFreeDoc(ctxt->myDoc);
14182 ctxt->myDoc = NULL;
14183 }
14184 if (sax != NULL)
14185 ctxt->sax = NULL;
14186 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014187
Owen Taylor3473f882001-02-23 17:55:21 +000014188 return(ret);
14189}
14190
14191/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014192 * xmlSAXParseFile:
14193 * @sax: the SAX handler block
14194 * @filename: the filename
14195 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14196 * documents
14197 *
14198 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14199 * compressed document is provided by default if found at compile-time.
14200 * It use the given SAX function block to handle the parsing callback.
14201 * If sax is NULL, fallback to the default DOM tree building routines.
14202 *
14203 * Returns the resulting document tree
14204 */
14205
14206xmlDocPtr
14207xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14208 int recovery) {
14209 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14210}
14211
14212/**
Owen Taylor3473f882001-02-23 17:55:21 +000014213 * xmlRecoverDoc:
14214 * @cur: a pointer to an array of xmlChar
14215 *
14216 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014217 * In the case the document is not Well Formed, a attempt to build a
14218 * tree is tried anyway
14219 *
14220 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014221 */
14222
14223xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014224xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014225 return(xmlSAXParseDoc(NULL, cur, 1));
14226}
14227
14228/**
14229 * xmlParseFile:
14230 * @filename: the filename
14231 *
14232 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14233 * compressed document is provided by default if found at compile-time.
14234 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014235 * Returns the resulting document tree if the file was wellformed,
14236 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014237 */
14238
14239xmlDocPtr
14240xmlParseFile(const char *filename) {
14241 return(xmlSAXParseFile(NULL, filename, 0));
14242}
14243
14244/**
14245 * xmlRecoverFile:
14246 * @filename: the filename
14247 *
14248 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14249 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014250 * In the case the document is not Well Formed, it attempts to build
14251 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014252 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014253 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014254 */
14255
14256xmlDocPtr
14257xmlRecoverFile(const char *filename) {
14258 return(xmlSAXParseFile(NULL, filename, 1));
14259}
14260
14261
14262/**
14263 * xmlSetupParserForBuffer:
14264 * @ctxt: an XML parser context
14265 * @buffer: a xmlChar * buffer
14266 * @filename: a file name
14267 *
14268 * Setup the parser context to parse a new buffer; Clears any prior
14269 * contents from the parser context. The buffer parameter must not be
14270 * NULL, but the filename parameter can be
14271 */
14272void
14273xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14274 const char* filename)
14275{
14276 xmlParserInputPtr input;
14277
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014278 if ((ctxt == NULL) || (buffer == NULL))
14279 return;
14280
Owen Taylor3473f882001-02-23 17:55:21 +000014281 input = xmlNewInputStream(ctxt);
14282 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014283 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014284 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014285 return;
14286 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014287
Owen Taylor3473f882001-02-23 17:55:21 +000014288 xmlClearParserCtxt(ctxt);
14289 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014290 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014291 input->base = buffer;
14292 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014293 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014294 inputPush(ctxt, input);
14295}
14296
14297/**
14298 * xmlSAXUserParseFile:
14299 * @sax: a SAX handler
14300 * @user_data: The user data returned on SAX callbacks
14301 * @filename: a file name
14302 *
14303 * parse an XML file and call the given SAX handler routines.
14304 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014305 *
Owen Taylor3473f882001-02-23 17:55:21 +000014306 * Returns 0 in case of success or a error number otherwise
14307 */
14308int
14309xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14310 const char *filename) {
14311 int ret = 0;
14312 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014313
Owen Taylor3473f882001-02-23 17:55:21 +000014314 ctxt = xmlCreateFileParserCtxt(filename);
14315 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014316 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014317 xmlFree(ctxt->sax);
14318 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014319 xmlDetectSAX2(ctxt);
14320
Owen Taylor3473f882001-02-23 17:55:21 +000014321 if (user_data != NULL)
14322 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014323
Owen Taylor3473f882001-02-23 17:55:21 +000014324 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014325
Owen Taylor3473f882001-02-23 17:55:21 +000014326 if (ctxt->wellFormed)
14327 ret = 0;
14328 else {
14329 if (ctxt->errNo != 0)
14330 ret = ctxt->errNo;
14331 else
14332 ret = -1;
14333 }
14334 if (sax != NULL)
14335 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014336 if (ctxt->myDoc != NULL) {
14337 xmlFreeDoc(ctxt->myDoc);
14338 ctxt->myDoc = NULL;
14339 }
Owen Taylor3473f882001-02-23 17:55:21 +000014340 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014341
Owen Taylor3473f882001-02-23 17:55:21 +000014342 return ret;
14343}
Daniel Veillard81273902003-09-30 00:43:48 +000014344#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014345
14346/************************************************************************
14347 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014348 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014349 * *
14350 ************************************************************************/
14351
14352/**
14353 * xmlCreateMemoryParserCtxt:
14354 * @buffer: a pointer to a char array
14355 * @size: the size of the array
14356 *
14357 * Create a parser context for an XML in-memory document.
14358 *
14359 * Returns the new parser context or NULL
14360 */
14361xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014362xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014363 xmlParserCtxtPtr ctxt;
14364 xmlParserInputPtr input;
14365 xmlParserInputBufferPtr buf;
14366
14367 if (buffer == NULL)
14368 return(NULL);
14369 if (size <= 0)
14370 return(NULL);
14371
14372 ctxt = xmlNewParserCtxt();
14373 if (ctxt == NULL)
14374 return(NULL);
14375
Daniel Veillard53350552003-09-18 13:35:51 +000014376 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014377 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014378 if (buf == NULL) {
14379 xmlFreeParserCtxt(ctxt);
14380 return(NULL);
14381 }
Owen Taylor3473f882001-02-23 17:55:21 +000014382
14383 input = xmlNewInputStream(ctxt);
14384 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014385 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014386 xmlFreeParserCtxt(ctxt);
14387 return(NULL);
14388 }
14389
14390 input->filename = NULL;
14391 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014392 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014393
14394 inputPush(ctxt, input);
14395 return(ctxt);
14396}
14397
Daniel Veillard81273902003-09-30 00:43:48 +000014398#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014399/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014400 * xmlSAXParseMemoryWithData:
14401 * @sax: the SAX handler block
14402 * @buffer: an pointer to a char array
14403 * @size: the size of the array
14404 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14405 * documents
14406 * @data: the userdata
14407 *
14408 * parse an XML in-memory block and use the given SAX function block
14409 * to handle the parsing callback. If sax is NULL, fallback to the default
14410 * DOM tree building routines.
14411 *
14412 * User data (void *) is stored within the parser context in the
14413 * context's _private member, so it is available nearly everywhere in libxml
14414 *
14415 * Returns the resulting document tree
14416 */
14417
14418xmlDocPtr
14419xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14420 int size, int recovery, void *data) {
14421 xmlDocPtr ret;
14422 xmlParserCtxtPtr ctxt;
14423
Daniel Veillardab2a7632009-07-09 08:45:03 +020014424 xmlInitParser();
14425
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014426 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14427 if (ctxt == NULL) return(NULL);
14428 if (sax != NULL) {
14429 if (ctxt->sax != NULL)
14430 xmlFree(ctxt->sax);
14431 ctxt->sax = sax;
14432 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014433 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014434 if (data!=NULL) {
14435 ctxt->_private=data;
14436 }
14437
Daniel Veillardadba5f12003-04-04 16:09:01 +000014438 ctxt->recovery = recovery;
14439
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014440 xmlParseDocument(ctxt);
14441
14442 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14443 else {
14444 ret = NULL;
14445 xmlFreeDoc(ctxt->myDoc);
14446 ctxt->myDoc = NULL;
14447 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014448 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014449 ctxt->sax = NULL;
14450 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014451
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014452 return(ret);
14453}
14454
14455/**
Owen Taylor3473f882001-02-23 17:55:21 +000014456 * xmlSAXParseMemory:
14457 * @sax: the SAX handler block
14458 * @buffer: an pointer to a char array
14459 * @size: the size of the array
14460 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14461 * documents
14462 *
14463 * parse an XML in-memory block and use the given SAX function block
14464 * to handle the parsing callback. If sax is NULL, fallback to the default
14465 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014466 *
Owen Taylor3473f882001-02-23 17:55:21 +000014467 * Returns the resulting document tree
14468 */
14469xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014470xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14471 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014472 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014473}
14474
14475/**
14476 * xmlParseMemory:
14477 * @buffer: an pointer to a char array
14478 * @size: the size of the array
14479 *
14480 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014481 *
Owen Taylor3473f882001-02-23 17:55:21 +000014482 * Returns the resulting document tree
14483 */
14484
Daniel Veillard50822cb2001-07-26 20:05:51 +000014485xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014486 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14487}
14488
14489/**
14490 * xmlRecoverMemory:
14491 * @buffer: an pointer to a char array
14492 * @size: the size of the array
14493 *
14494 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014495 * In the case the document is not Well Formed, an attempt to
14496 * build a tree is tried anyway
14497 *
14498 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014499 */
14500
Daniel Veillard50822cb2001-07-26 20:05:51 +000014501xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014502 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14503}
14504
14505/**
14506 * xmlSAXUserParseMemory:
14507 * @sax: a SAX handler
14508 * @user_data: The user data returned on SAX callbacks
14509 * @buffer: an in-memory XML document input
14510 * @size: the length of the XML document in bytes
14511 *
14512 * A better SAX parsing routine.
14513 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014514 *
Owen Taylor3473f882001-02-23 17:55:21 +000014515 * Returns 0 in case of success or a error number otherwise
14516 */
14517int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014518 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014519 int ret = 0;
14520 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014521
14522 xmlInitParser();
14523
Owen Taylor3473f882001-02-23 17:55:21 +000014524 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14525 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014526 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14527 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014528 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014529 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014530
Daniel Veillard30211a02001-04-26 09:33:18 +000014531 if (user_data != NULL)
14532 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014533
Owen Taylor3473f882001-02-23 17:55:21 +000014534 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014535
Owen Taylor3473f882001-02-23 17:55:21 +000014536 if (ctxt->wellFormed)
14537 ret = 0;
14538 else {
14539 if (ctxt->errNo != 0)
14540 ret = ctxt->errNo;
14541 else
14542 ret = -1;
14543 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014544 if (sax != NULL)
14545 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014546 if (ctxt->myDoc != NULL) {
14547 xmlFreeDoc(ctxt->myDoc);
14548 ctxt->myDoc = NULL;
14549 }
Owen Taylor3473f882001-02-23 17:55:21 +000014550 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014551
Owen Taylor3473f882001-02-23 17:55:21 +000014552 return ret;
14553}
Daniel Veillard81273902003-09-30 00:43:48 +000014554#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014555
14556/**
14557 * xmlCreateDocParserCtxt:
14558 * @cur: a pointer to an array of xmlChar
14559 *
14560 * Creates a parser context for an XML in-memory document.
14561 *
14562 * Returns the new parser context or NULL
14563 */
14564xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014565xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014566 int len;
14567
14568 if (cur == NULL)
14569 return(NULL);
14570 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014571 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014572}
14573
Daniel Veillard81273902003-09-30 00:43:48 +000014574#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014575/**
14576 * xmlSAXParseDoc:
14577 * @sax: the SAX handler block
14578 * @cur: a pointer to an array of xmlChar
14579 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14580 * documents
14581 *
14582 * parse an XML in-memory document and build a tree.
14583 * It use the given SAX function block to handle the parsing callback.
14584 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014585 *
Owen Taylor3473f882001-02-23 17:55:21 +000014586 * Returns the resulting document tree
14587 */
14588
14589xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014590xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014591 xmlDocPtr ret;
14592 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014593 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014594
Daniel Veillard38936062004-11-04 17:45:11 +000014595 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014596
14597
14598 ctxt = xmlCreateDocParserCtxt(cur);
14599 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014600 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014601 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014602 ctxt->sax = sax;
14603 ctxt->userData = NULL;
14604 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014605 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014606
14607 xmlParseDocument(ctxt);
14608 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14609 else {
14610 ret = NULL;
14611 xmlFreeDoc(ctxt->myDoc);
14612 ctxt->myDoc = NULL;
14613 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014614 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014615 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014616 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014617
Owen Taylor3473f882001-02-23 17:55:21 +000014618 return(ret);
14619}
14620
14621/**
14622 * xmlParseDoc:
14623 * @cur: a pointer to an array of xmlChar
14624 *
14625 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014626 *
Owen Taylor3473f882001-02-23 17:55:21 +000014627 * Returns the resulting document tree
14628 */
14629
14630xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014631xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014632 return(xmlSAXParseDoc(NULL, cur, 0));
14633}
Daniel Veillard81273902003-09-30 00:43:48 +000014634#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014635
Daniel Veillard81273902003-09-30 00:43:48 +000014636#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014637/************************************************************************
14638 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014639 * Specific function to keep track of entities references *
14640 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014641 * *
14642 ************************************************************************/
14643
14644static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14645
14646/**
14647 * xmlAddEntityReference:
14648 * @ent : A valid entity
14649 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014650 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014651 *
14652 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14653 */
14654static void
14655xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14656 xmlNodePtr lastNode)
14657{
14658 if (xmlEntityRefFunc != NULL) {
14659 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14660 }
14661}
14662
14663
14664/**
14665 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014666 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014667 *
14668 * Set the function to call call back when a xml reference has been made
14669 */
14670void
14671xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14672{
14673 xmlEntityRefFunc = func;
14674}
Daniel Veillard81273902003-09-30 00:43:48 +000014675#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014676
14677/************************************************************************
14678 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014679 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014680 * *
14681 ************************************************************************/
14682
14683#ifdef LIBXML_XPATH_ENABLED
14684#include <libxml/xpath.h>
14685#endif
14686
Daniel Veillardffa3c742005-07-21 13:24:09 +000014687extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014688static int xmlParserInitialized = 0;
14689
14690/**
14691 * xmlInitParser:
14692 *
14693 * Initialization function for the XML parser.
14694 * This is not reentrant. Call once before processing in case of
14695 * use in multithreaded programs.
14696 */
14697
14698void
14699xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014700 if (xmlParserInitialized != 0)
14701 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014702
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014703#ifdef LIBXML_THREAD_ENABLED
14704 __xmlGlobalInitMutexLock();
14705 if (xmlParserInitialized == 0) {
14706#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014707 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014708 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014709 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14710 (xmlGenericError == NULL))
14711 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014712 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014713 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014714 xmlInitCharEncodingHandlers();
14715 xmlDefaultSAXHandlerInit();
14716 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014717#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014718 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014719#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014720#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014721 htmlInitAutoClose();
14722 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014723#endif
14724#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014725 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014726#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014727 xmlParserInitialized = 1;
14728#ifdef LIBXML_THREAD_ENABLED
14729 }
14730 __xmlGlobalInitMutexUnlock();
14731#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014732}
14733
14734/**
14735 * xmlCleanupParser:
14736 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014737 * This function name is somewhat misleading. It does not clean up
14738 * parser state, it cleans up memory allocated by the library itself.
14739 * It is a cleanup function for the XML library. It tries to reclaim all
14740 * related global memory allocated for the library processing.
14741 * It doesn't deallocate any document related memory. One should
14742 * call xmlCleanupParser() only when the process has finished using
14743 * the library and all XML/HTML documents built with it.
14744 * See also xmlInitParser() which has the opposite function of preparing
14745 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014746 *
14747 * WARNING: if your application is multithreaded or has plugin support
14748 * calling this may crash the application if another thread or
14749 * a plugin is still using libxml2. It's sometimes very hard to
14750 * guess if libxml2 is in use in the application, some libraries
14751 * or plugins may use it without notice. In case of doubt abstain
14752 * from calling this function or do it just before calling exit()
14753 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014754 */
14755
14756void
14757xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014758 if (!xmlParserInitialized)
14759 return;
14760
Owen Taylor3473f882001-02-23 17:55:21 +000014761 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014762#ifdef LIBXML_CATALOG_ENABLED
14763 xmlCatalogCleanup();
14764#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014765 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014766 xmlCleanupInputCallbacks();
14767#ifdef LIBXML_OUTPUT_ENABLED
14768 xmlCleanupOutputCallbacks();
14769#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014770#ifdef LIBXML_SCHEMAS_ENABLED
14771 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014772 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014773#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014774 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014775 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014776 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014777 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014778 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014779}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014780
14781/************************************************************************
14782 * *
14783 * New set (2.6.0) of simpler and more flexible APIs *
14784 * *
14785 ************************************************************************/
14786
14787/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014788 * DICT_FREE:
14789 * @str: a string
14790 *
14791 * Free a string if it is not owned by the "dict" dictionnary in the
14792 * current scope
14793 */
14794#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014795 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014796 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14797 xmlFree((char *)(str));
14798
14799/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014800 * xmlCtxtReset:
14801 * @ctxt: an XML parser context
14802 *
14803 * Reset a parser context
14804 */
14805void
14806xmlCtxtReset(xmlParserCtxtPtr ctxt)
14807{
14808 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014809 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014810
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014811 if (ctxt == NULL)
14812 return;
14813
14814 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014815
14816 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14817 xmlFreeInputStream(input);
14818 }
14819 ctxt->inputNr = 0;
14820 ctxt->input = NULL;
14821
14822 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014823 if (ctxt->spaceTab != NULL) {
14824 ctxt->spaceTab[0] = -1;
14825 ctxt->space = &ctxt->spaceTab[0];
14826 } else {
14827 ctxt->space = NULL;
14828 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014829
14830
14831 ctxt->nodeNr = 0;
14832 ctxt->node = NULL;
14833
14834 ctxt->nameNr = 0;
14835 ctxt->name = NULL;
14836
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014837 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014838 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014839 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014840 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014841 DICT_FREE(ctxt->directory);
14842 ctxt->directory = NULL;
14843 DICT_FREE(ctxt->extSubURI);
14844 ctxt->extSubURI = NULL;
14845 DICT_FREE(ctxt->extSubSystem);
14846 ctxt->extSubSystem = NULL;
14847 if (ctxt->myDoc != NULL)
14848 xmlFreeDoc(ctxt->myDoc);
14849 ctxt->myDoc = NULL;
14850
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014851 ctxt->standalone = -1;
14852 ctxt->hasExternalSubset = 0;
14853 ctxt->hasPErefs = 0;
14854 ctxt->html = 0;
14855 ctxt->external = 0;
14856 ctxt->instate = XML_PARSER_START;
14857 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014858
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014859 ctxt->wellFormed = 1;
14860 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014861 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014862 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014863#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014864 ctxt->vctxt.userData = ctxt;
14865 ctxt->vctxt.error = xmlParserValidityError;
14866 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014867#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014868 ctxt->record_info = 0;
14869 ctxt->nbChars = 0;
14870 ctxt->checkIndex = 0;
14871 ctxt->inSubset = 0;
14872 ctxt->errNo = XML_ERR_OK;
14873 ctxt->depth = 0;
14874 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14875 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014876 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014877 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014878 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014879 xmlInitNodeInfoSeq(&ctxt->node_seq);
14880
14881 if (ctxt->attsDefault != NULL) {
14882 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14883 ctxt->attsDefault = NULL;
14884 }
14885 if (ctxt->attsSpecial != NULL) {
14886 xmlHashFree(ctxt->attsSpecial, NULL);
14887 ctxt->attsSpecial = NULL;
14888 }
14889
Daniel Veillard4432df22003-09-28 18:58:27 +000014890#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014891 if (ctxt->catalogs != NULL)
14892 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014893#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014894 if (ctxt->lastError.code != XML_ERR_OK)
14895 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014896}
14897
14898/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014899 * xmlCtxtResetPush:
14900 * @ctxt: an XML parser context
14901 * @chunk: a pointer to an array of chars
14902 * @size: number of chars in the array
14903 * @filename: an optional file name or URI
14904 * @encoding: the document encoding, or NULL
14905 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014906 * Reset a push parser context
14907 *
14908 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014909 */
14910int
14911xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14912 int size, const char *filename, const char *encoding)
14913{
14914 xmlParserInputPtr inputStream;
14915 xmlParserInputBufferPtr buf;
14916 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14917
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014918 if (ctxt == NULL)
14919 return(1);
14920
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014921 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14922 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14923
14924 buf = xmlAllocParserInputBuffer(enc);
14925 if (buf == NULL)
14926 return(1);
14927
14928 if (ctxt == NULL) {
14929 xmlFreeParserInputBuffer(buf);
14930 return(1);
14931 }
14932
14933 xmlCtxtReset(ctxt);
14934
14935 if (ctxt->pushTab == NULL) {
14936 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14937 sizeof(xmlChar *));
14938 if (ctxt->pushTab == NULL) {
14939 xmlErrMemory(ctxt, NULL);
14940 xmlFreeParserInputBuffer(buf);
14941 return(1);
14942 }
14943 }
14944
14945 if (filename == NULL) {
14946 ctxt->directory = NULL;
14947 } else {
14948 ctxt->directory = xmlParserGetDirectory(filename);
14949 }
14950
14951 inputStream = xmlNewInputStream(ctxt);
14952 if (inputStream == NULL) {
14953 xmlFreeParserInputBuffer(buf);
14954 return(1);
14955 }
14956
14957 if (filename == NULL)
14958 inputStream->filename = NULL;
14959 else
14960 inputStream->filename = (char *)
14961 xmlCanonicPath((const xmlChar *) filename);
14962 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014963 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014964
14965 inputPush(ctxt, inputStream);
14966
14967 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14968 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014969 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14970 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014971
14972 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14973
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014974 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014975#ifdef DEBUG_PUSH
14976 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14977#endif
14978 }
14979
14980 if (encoding != NULL) {
14981 xmlCharEncodingHandlerPtr hdlr;
14982
Daniel Veillard37334572008-07-31 08:20:02 +000014983 if (ctxt->encoding != NULL)
14984 xmlFree((xmlChar *) ctxt->encoding);
14985 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14986
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014987 hdlr = xmlFindCharEncodingHandler(encoding);
14988 if (hdlr != NULL) {
14989 xmlSwitchToEncoding(ctxt, hdlr);
14990 } else {
14991 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14992 "Unsupported encoding %s\n", BAD_CAST encoding);
14993 }
14994 } else if (enc != XML_CHAR_ENCODING_NONE) {
14995 xmlSwitchEncoding(ctxt, enc);
14996 }
14997
14998 return(0);
14999}
15000
Daniel Veillard37334572008-07-31 08:20:02 +000015001
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015002/**
Daniel Veillard37334572008-07-31 08:20:02 +000015003 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015004 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015005 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015006 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015007 *
15008 * Applies the options to the parser context
15009 *
15010 * Returns 0 in case of success, the set of unknown or unimplemented options
15011 * in case of error.
15012 */
Daniel Veillard37334572008-07-31 08:20:02 +000015013static int
15014xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015015{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015016 if (ctxt == NULL)
15017 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015018 if (encoding != NULL) {
15019 if (ctxt->encoding != NULL)
15020 xmlFree((xmlChar *) ctxt->encoding);
15021 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15022 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015023 if (options & XML_PARSE_RECOVER) {
15024 ctxt->recovery = 1;
15025 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015026 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015027 } else
15028 ctxt->recovery = 0;
15029 if (options & XML_PARSE_DTDLOAD) {
15030 ctxt->loadsubset = XML_DETECT_IDS;
15031 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015032 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015033 } else
15034 ctxt->loadsubset = 0;
15035 if (options & XML_PARSE_DTDATTR) {
15036 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15037 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015038 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015039 }
15040 if (options & XML_PARSE_NOENT) {
15041 ctxt->replaceEntities = 1;
15042 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15043 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015044 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015045 } else
15046 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015047 if (options & XML_PARSE_PEDANTIC) {
15048 ctxt->pedantic = 1;
15049 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015050 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015051 } else
15052 ctxt->pedantic = 0;
15053 if (options & XML_PARSE_NOBLANKS) {
15054 ctxt->keepBlanks = 0;
15055 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15056 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015057 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015058 } else
15059 ctxt->keepBlanks = 1;
15060 if (options & XML_PARSE_DTDVALID) {
15061 ctxt->validate = 1;
15062 if (options & XML_PARSE_NOWARNING)
15063 ctxt->vctxt.warning = NULL;
15064 if (options & XML_PARSE_NOERROR)
15065 ctxt->vctxt.error = NULL;
15066 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015067 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015068 } else
15069 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015070 if (options & XML_PARSE_NOWARNING) {
15071 ctxt->sax->warning = NULL;
15072 options -= XML_PARSE_NOWARNING;
15073 }
15074 if (options & XML_PARSE_NOERROR) {
15075 ctxt->sax->error = NULL;
15076 ctxt->sax->fatalError = NULL;
15077 options -= XML_PARSE_NOERROR;
15078 }
Daniel Veillard81273902003-09-30 00:43:48 +000015079#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015080 if (options & XML_PARSE_SAX1) {
15081 ctxt->sax->startElement = xmlSAX2StartElement;
15082 ctxt->sax->endElement = xmlSAX2EndElement;
15083 ctxt->sax->startElementNs = NULL;
15084 ctxt->sax->endElementNs = NULL;
15085 ctxt->sax->initialized = 1;
15086 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015087 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015088 }
Daniel Veillard81273902003-09-30 00:43:48 +000015089#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015090 if (options & XML_PARSE_NODICT) {
15091 ctxt->dictNames = 0;
15092 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015093 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015094 } else {
15095 ctxt->dictNames = 1;
15096 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015097 if (options & XML_PARSE_NOCDATA) {
15098 ctxt->sax->cdataBlock = NULL;
15099 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015100 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015101 }
15102 if (options & XML_PARSE_NSCLEAN) {
15103 ctxt->options |= XML_PARSE_NSCLEAN;
15104 options -= XML_PARSE_NSCLEAN;
15105 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015106 if (options & XML_PARSE_NONET) {
15107 ctxt->options |= XML_PARSE_NONET;
15108 options -= XML_PARSE_NONET;
15109 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015110 if (options & XML_PARSE_COMPACT) {
15111 ctxt->options |= XML_PARSE_COMPACT;
15112 options -= XML_PARSE_COMPACT;
15113 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015114 if (options & XML_PARSE_OLD10) {
15115 ctxt->options |= XML_PARSE_OLD10;
15116 options -= XML_PARSE_OLD10;
15117 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015118 if (options & XML_PARSE_NOBASEFIX) {
15119 ctxt->options |= XML_PARSE_NOBASEFIX;
15120 options -= XML_PARSE_NOBASEFIX;
15121 }
15122 if (options & XML_PARSE_HUGE) {
15123 ctxt->options |= XML_PARSE_HUGE;
15124 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015125 if (ctxt->dict != NULL)
15126 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015127 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015128 if (options & XML_PARSE_OLDSAX) {
15129 ctxt->options |= XML_PARSE_OLDSAX;
15130 options -= XML_PARSE_OLDSAX;
15131 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015132 if (options & XML_PARSE_IGNORE_ENC) {
15133 ctxt->options |= XML_PARSE_IGNORE_ENC;
15134 options -= XML_PARSE_IGNORE_ENC;
15135 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015136 if (options & XML_PARSE_BIG_LINES) {
15137 ctxt->options |= XML_PARSE_BIG_LINES;
15138 options -= XML_PARSE_BIG_LINES;
15139 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015140 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015141 return (options);
15142}
15143
15144/**
Daniel Veillard37334572008-07-31 08:20:02 +000015145 * xmlCtxtUseOptions:
15146 * @ctxt: an XML parser context
15147 * @options: a combination of xmlParserOption
15148 *
15149 * Applies the options to the parser context
15150 *
15151 * Returns 0 in case of success, the set of unknown or unimplemented options
15152 * in case of error.
15153 */
15154int
15155xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15156{
15157 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15158}
15159
15160/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015161 * xmlDoRead:
15162 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015163 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015164 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015165 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015166 * @reuse: keep the context for reuse
15167 *
15168 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015169 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015170 * Returns the resulting document tree or NULL
15171 */
15172static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015173xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15174 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015175{
15176 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015177
15178 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015179 if (encoding != NULL) {
15180 xmlCharEncodingHandlerPtr hdlr;
15181
15182 hdlr = xmlFindCharEncodingHandler(encoding);
15183 if (hdlr != NULL)
15184 xmlSwitchToEncoding(ctxt, hdlr);
15185 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015186 if ((URL != NULL) && (ctxt->input != NULL) &&
15187 (ctxt->input->filename == NULL))
15188 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015189 xmlParseDocument(ctxt);
15190 if ((ctxt->wellFormed) || ctxt->recovery)
15191 ret = ctxt->myDoc;
15192 else {
15193 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015194 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015195 xmlFreeDoc(ctxt->myDoc);
15196 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015197 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015198 ctxt->myDoc = NULL;
15199 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015200 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015201 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015202
15203 return (ret);
15204}
15205
15206/**
15207 * xmlReadDoc:
15208 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015209 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015210 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015211 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015212 *
15213 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015214 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015215 * Returns the resulting document tree
15216 */
15217xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015218xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015219{
15220 xmlParserCtxtPtr ctxt;
15221
15222 if (cur == NULL)
15223 return (NULL);
15224
15225 ctxt = xmlCreateDocParserCtxt(cur);
15226 if (ctxt == NULL)
15227 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015228 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015229}
15230
15231/**
15232 * xmlReadFile:
15233 * @filename: a file or URL
15234 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015235 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015236 *
15237 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015238 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015239 * Returns the resulting document tree
15240 */
15241xmlDocPtr
15242xmlReadFile(const char *filename, const char *encoding, int options)
15243{
15244 xmlParserCtxtPtr ctxt;
15245
Daniel Veillard61b93382003-11-03 14:28:31 +000015246 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015247 if (ctxt == NULL)
15248 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015249 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015250}
15251
15252/**
15253 * xmlReadMemory:
15254 * @buffer: a pointer to a char array
15255 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015256 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015257 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015258 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015259 *
15260 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015261 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015262 * Returns the resulting document tree
15263 */
15264xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015265xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266{
15267 xmlParserCtxtPtr ctxt;
15268
15269 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15270 if (ctxt == NULL)
15271 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015272 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015273}
15274
15275/**
15276 * xmlReadFd:
15277 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015278 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015279 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015280 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015281 *
15282 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015283 * NOTE that the file descriptor will not be closed when the
15284 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015285 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015286 * Returns the resulting document tree
15287 */
15288xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015289xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015290{
15291 xmlParserCtxtPtr ctxt;
15292 xmlParserInputBufferPtr input;
15293 xmlParserInputPtr stream;
15294
15295 if (fd < 0)
15296 return (NULL);
15297
15298 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15299 if (input == NULL)
15300 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015301 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015302 ctxt = xmlNewParserCtxt();
15303 if (ctxt == NULL) {
15304 xmlFreeParserInputBuffer(input);
15305 return (NULL);
15306 }
15307 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15308 if (stream == NULL) {
15309 xmlFreeParserInputBuffer(input);
15310 xmlFreeParserCtxt(ctxt);
15311 return (NULL);
15312 }
15313 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015314 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015315}
15316
15317/**
15318 * xmlReadIO:
15319 * @ioread: an I/O read function
15320 * @ioclose: an I/O close function
15321 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015322 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015323 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015324 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015325 *
15326 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015327 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015328 * Returns the resulting document tree
15329 */
15330xmlDocPtr
15331xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015332 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015333{
15334 xmlParserCtxtPtr ctxt;
15335 xmlParserInputBufferPtr input;
15336 xmlParserInputPtr stream;
15337
15338 if (ioread == NULL)
15339 return (NULL);
15340
15341 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15342 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015343 if (input == NULL) {
15344 if (ioclose != NULL)
15345 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015346 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015347 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015348 ctxt = xmlNewParserCtxt();
15349 if (ctxt == NULL) {
15350 xmlFreeParserInputBuffer(input);
15351 return (NULL);
15352 }
15353 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15354 if (stream == NULL) {
15355 xmlFreeParserInputBuffer(input);
15356 xmlFreeParserCtxt(ctxt);
15357 return (NULL);
15358 }
15359 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015360 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015361}
15362
15363/**
15364 * xmlCtxtReadDoc:
15365 * @ctxt: an XML parser context
15366 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015367 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015368 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015369 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015370 *
15371 * parse an XML in-memory document and build a tree.
15372 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015373 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015374 * Returns the resulting document tree
15375 */
15376xmlDocPtr
15377xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015378 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015379{
15380 xmlParserInputPtr stream;
15381
15382 if (cur == NULL)
15383 return (NULL);
15384 if (ctxt == NULL)
15385 return (NULL);
15386
15387 xmlCtxtReset(ctxt);
15388
15389 stream = xmlNewStringInputStream(ctxt, cur);
15390 if (stream == NULL) {
15391 return (NULL);
15392 }
15393 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015394 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015395}
15396
15397/**
15398 * xmlCtxtReadFile:
15399 * @ctxt: an XML parser context
15400 * @filename: a file or URL
15401 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015402 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015403 *
15404 * parse an XML file from the filesystem or the network.
15405 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015406 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015407 * Returns the resulting document tree
15408 */
15409xmlDocPtr
15410xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15411 const char *encoding, int options)
15412{
15413 xmlParserInputPtr stream;
15414
15415 if (filename == NULL)
15416 return (NULL);
15417 if (ctxt == NULL)
15418 return (NULL);
15419
15420 xmlCtxtReset(ctxt);
15421
Daniel Veillard29614c72004-11-26 10:47:26 +000015422 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015423 if (stream == NULL) {
15424 return (NULL);
15425 }
15426 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015427 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015428}
15429
15430/**
15431 * xmlCtxtReadMemory:
15432 * @ctxt: an XML parser context
15433 * @buffer: a pointer to a char array
15434 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015435 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015436 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015437 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015438 *
15439 * parse an XML in-memory document and build a tree.
15440 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015441 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015442 * Returns the resulting document tree
15443 */
15444xmlDocPtr
15445xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015446 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015447{
15448 xmlParserInputBufferPtr input;
15449 xmlParserInputPtr stream;
15450
15451 if (ctxt == NULL)
15452 return (NULL);
15453 if (buffer == NULL)
15454 return (NULL);
15455
15456 xmlCtxtReset(ctxt);
15457
15458 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15459 if (input == NULL) {
15460 return(NULL);
15461 }
15462
15463 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15464 if (stream == NULL) {
15465 xmlFreeParserInputBuffer(input);
15466 return(NULL);
15467 }
15468
15469 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015470 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015471}
15472
15473/**
15474 * xmlCtxtReadFd:
15475 * @ctxt: an XML parser context
15476 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015477 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015478 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015479 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015480 *
15481 * parse an XML from a file descriptor and build a tree.
15482 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015483 * NOTE that the file descriptor will not be closed when the
15484 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015485 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015486 * Returns the resulting document tree
15487 */
15488xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015489xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15490 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015491{
15492 xmlParserInputBufferPtr input;
15493 xmlParserInputPtr stream;
15494
15495 if (fd < 0)
15496 return (NULL);
15497 if (ctxt == NULL)
15498 return (NULL);
15499
15500 xmlCtxtReset(ctxt);
15501
15502
15503 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15504 if (input == NULL)
15505 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015506 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015507 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15508 if (stream == NULL) {
15509 xmlFreeParserInputBuffer(input);
15510 return (NULL);
15511 }
15512 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015513 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015514}
15515
15516/**
15517 * xmlCtxtReadIO:
15518 * @ctxt: an XML parser context
15519 * @ioread: an I/O read function
15520 * @ioclose: an I/O close function
15521 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015522 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015523 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015524 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015525 *
15526 * parse an XML document from I/O functions and source and build a tree.
15527 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015528 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015529 * Returns the resulting document tree
15530 */
15531xmlDocPtr
15532xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15533 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015534 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015535 const char *encoding, int options)
15536{
15537 xmlParserInputBufferPtr input;
15538 xmlParserInputPtr stream;
15539
15540 if (ioread == NULL)
15541 return (NULL);
15542 if (ctxt == NULL)
15543 return (NULL);
15544
15545 xmlCtxtReset(ctxt);
15546
15547 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15548 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015549 if (input == NULL) {
15550 if (ioclose != NULL)
15551 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015552 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015553 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015554 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15555 if (stream == NULL) {
15556 xmlFreeParserInputBuffer(input);
15557 return (NULL);
15558 }
15559 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015560 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015561}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015562
15563#define bottom_parser
15564#include "elfgcchack.h"