blob: 39d391b16c912e499f425177caaa42ecc2ab078f [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800125 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 if (replacement != 0) {
134 if (replacement < XML_MAX_TEXT_LENGTH)
135 return(0);
136
137 /*
138 * If the volume of entity copy reaches 10 times the
139 * amount of parsed data and over the large text threshold
140 * then that's very likely to be an abuse.
141 */
142 if (ctxt->input != NULL) {
143 consumed = ctxt->input->consumed +
144 (ctxt->input->cur - ctxt->input->base);
145 }
146 consumed += ctxt->sizeentities;
147
148 if (replacement < XML_PARSER_NON_LINEAR * consumed)
149 return(0);
150 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000151 /*
152 * Do the check based on the replacement size of the entity
153 */
154 if (size < XML_PARSER_BIG_ENTITY)
155 return(0);
156
157 /*
158 * A limit on the amount of text data reasonably used
159 */
160 if (ctxt->input != NULL) {
161 consumed = ctxt->input->consumed +
162 (ctxt->input->cur - ctxt->input->base);
163 }
164 consumed += ctxt->sizeentities;
165
166 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168 return (0);
169 } else if (ent != NULL) {
170 /*
171 * use the number of parsed entities in the replacement
172 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800173 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000174
175 /*
176 * The amount of data parsed counting entities size only once
177 */
178 if (ctxt->input != NULL) {
179 consumed = ctxt->input->consumed +
180 (ctxt->input->cur - ctxt->input->base);
181 }
182 consumed += ctxt->sizeentities;
183
184 /*
185 * Check the density of entities for the amount of data
186 * knowing an entity reference will take at least 3 bytes
187 */
188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189 return (0);
190 } else {
191 /*
192 * strange we got no data for checking just return
193 */
194 return (0);
195 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 return (1);
198}
199
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000200/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000201 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000203 * arbitrary depth limit for the XML documents that we allow to
204 * process. This is not a limitation of the parser but a safety
205 * boundary feature. It can be disabled with the XML_PARSE_HUGE
206 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000207 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000208unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard0fb18932003-09-07 09:14:37 +0000210
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000213#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000214#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000215#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216
Daniel Veillard1f972e92012-08-15 10:16:37 +0800217/**
218 * XML_PARSER_CHUNK_SIZE
219 *
220 * When calling GROW that's the minimal amount of data
221 * the parser expected to have received. It is not a hard
222 * limit but an optimization when reading strings like Names
223 * It is not strictly needed as long as inputs available characters
224 * are followed by 0, which should be provided by the I/O level
225 */
226#define XML_PARSER_CHUNK_SIZE 100
227
Owen Taylor3473f882001-02-23 17:55:21 +0000228/*
Owen Taylor3473f882001-02-23 17:55:21 +0000229 * List of XML prefixed PI allowed by W3C specs
230 */
231
Daniel Veillardb44025c2001-10-11 22:55:55 +0000232static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000233 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800234 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000235 NULL
236};
237
Daniel Veillarda07050d2003-10-19 14:46:32 +0000238
Owen Taylor3473f882001-02-23 17:55:21 +0000239/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200240static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000242
Daniel Veillard7d515752003-09-26 19:12:37 +0000243static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000244xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000246 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000247 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard37334572008-07-31 08:20:02 +0000249static int
250xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000252#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000253static void
254xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000256#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000257
Daniel Veillard7d515752003-09-26 19:12:37 +0000258static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000259xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000261
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000262static int
263xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264
Daniel Veillarde57ec792003-09-10 10:50:59 +0000265/************************************************************************
266 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800267 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 * *
269 ************************************************************************/
270
271/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 * xmlErrAttributeDup:
273 * @ctxt: an XML parser context
274 * @prefix: the attribute prefix
275 * @localname: the attribute localname
276 *
277 * Handle a redefinition of attribute error
278 */
279static void
280xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281 const xmlChar * localname)
282{
Daniel Veillard157fee02003-10-31 10:36:03 +0000283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284 (ctxt->instate == XML_PARSER_EOF))
285 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000286 if (ctxt != NULL)
287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200288
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000289 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 (const char *) localname, NULL, NULL, 0, 0,
293 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000294 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 (const char *) prefix, (const char *) localname,
298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000300 if (ctxt != NULL) {
301 ctxt->wellFormed = 0;
302 if (ctxt->recovery == 0)
303 ctxt->disableSAX = 1;
304 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305}
306
307/**
308 * xmlFatalErr:
309 * @ctxt: an XML parser context
310 * @error: the error number
311 * @extra: extra information string
312 *
313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314 */
315static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317{
318 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800319 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320
Daniel Veillard157fee02003-10-31 10:36:03 +0000321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322 (ctxt->instate == XML_PARSER_EOF))
323 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 switch (error) {
325 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800326 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800329 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800332 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "internal error";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800338 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800341 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800344 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800350 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800353 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800356 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "Fragment not allowed";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
440 case XML_ERR_CONDSEC_INVALID_KEYWORD:
441 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800442 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800445 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800448 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800451 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800454 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000456 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000492 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800495 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 if (info == NULL)
507 snprintf(errstr, 128, "%s\n", errmsg);
508 else
509 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000510 if (ctxt != NULL)
511 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000514 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL) {
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0)
518 ctxt->disableSAX = 1;
519 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520}
521
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000522/**
523 * xmlFatalErrMsg:
524 * @ctxt: an XML parser context
525 * @error: the error number
526 * @msg: the error message
527 *
528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529 */
530static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000533{
Daniel Veillard157fee02003-10-31 10:36:03 +0000534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535 (ctxt->instate == XML_PARSER_EOF))
536 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000537 if (ctxt != NULL)
538 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000546}
547
548/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000549 * xmlWarningMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 * @str2: extra data
555 *
556 * Handle a warning.
557 */
558static void
559xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg, const xmlChar *str1, const xmlChar *str2)
561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000563
Daniel Veillard157fee02003-10-31 10:36:03 +0000564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000569 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200570 if (ctxt != NULL) {
571 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000572 (ctxt->sax) ? ctxt->sax->warning : NULL,
573 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_WARNING, NULL, 0,
576 (const char *) str1, (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200578 } else {
579 __xmlRaiseError(schannel, NULL, NULL,
580 ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_WARNING, NULL, 0,
582 (const char *) str1, (const char *) str2, NULL, 0, 0,
583 msg, (const char *) str1, (const char *) str2);
584 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585}
586
587/**
588 * xmlValidityError:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the error message
592 * @str1: extra data
593 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000594 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595 */
596static void
597xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000598 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000599{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000600 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000601
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->errNo = error;
607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608 schannel = ctxt->sax->serror;
609 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200610 if (ctxt != NULL) {
611 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000612 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613 ctxt, NULL, XML_FROM_DTD, error,
614 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000615 (const char *) str2, NULL, 0, 0,
616 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000617 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200618 } else {
619 __xmlRaiseError(schannel, NULL, NULL,
620 ctxt, NULL, XML_FROM_DTD, error,
621 XML_ERR_ERROR, NULL, 0, (const char *) str1,
622 (const char *) str2, NULL, 0, 0,
623 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000624 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000625}
626
627/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000628 * xmlFatalErrMsgInt:
629 * @ctxt: an XML parser context
630 * @error: the error number
631 * @msg: the error message
632 * @val: an integer value
633 *
634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635 */
636static void
637xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000638 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000639{
Daniel Veillard157fee02003-10-31 10:36:03 +0000640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000643 if (ctxt != NULL)
644 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000645 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000648 if (ctxt != NULL) {
649 ctxt->wellFormed = 0;
650 if (ctxt->recovery == 0)
651 ctxt->disableSAX = 1;
652 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000653}
654
655/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000656 * xmlFatalErrMsgStrIntStr:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @str1: an string info
661 * @val: an integer value
662 * @str2: an string info
663 *
664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665 */
666static void
667xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800668 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000669 const xmlChar *str2)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678 NULL, 0, (const char *) str1, (const char *) str2,
679 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000680 if (ctxt != NULL) {
681 ctxt->wellFormed = 0;
682 if (ctxt->recovery == 0)
683 ctxt->disableSAX = 1;
684 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000685}
686
687/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000688 * xmlFatalErrMsgStr:
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the error message
692 * @val: a string value
693 *
694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695 */
696static void
697xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000698 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000699{
Daniel Veillard157fee02003-10-31 10:36:03 +0000700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701 (ctxt->instate == XML_PARSER_EOF))
702 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000706 XML_FROM_PARSER, error, XML_ERR_FATAL,
707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL) {
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
713 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000714}
715
716/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000717 * xmlErrMsgStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @val: a string value
722 *
723 * Handle a non fatal parser error
724 */
725static void
726xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727 const char *msg, const xmlChar * val)
728{
Daniel Veillard157fee02003-10-31 10:36:03 +0000729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730 (ctxt->instate == XML_PARSER_EOF))
731 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000732 if (ctxt != NULL)
733 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000735 XML_FROM_PARSER, error, XML_ERR_ERROR,
736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737 val);
738}
739
740/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000741 * xmlNsErr:
742 * @ctxt: an XML parser context
743 * @error: the error number
744 * @msg: the message
745 * @info1: extra information string
746 * @info2: extra information string
747 *
748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749 */
750static void
751xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000753 const xmlChar * info1, const xmlChar * info2,
754 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000755{
Daniel Veillard157fee02003-10-31 10:36:03 +0000756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757 (ctxt->instate == XML_PARSER_EOF))
758 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000759 if (ctxt != NULL)
760 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000762 XML_ERR_ERROR, NULL, 0, (const char *) info1,
763 (const char *) info2, (const char *) info3, 0, 0, msg,
764 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000765 if (ctxt != NULL)
766 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000767}
768
Daniel Veillard37334572008-07-31 08:20:02 +0000769/**
770 * xmlNsWarn
771 * @ctxt: an XML parser context
772 * @error: the error number
773 * @msg: the message
774 * @info1: extra information string
775 * @info2: extra information string
776 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800777 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000778 */
779static void
780xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781 const char *msg,
782 const xmlChar * info1, const xmlChar * info2,
783 const xmlChar * info3)
784{
785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786 (ctxt->instate == XML_PARSER_EOF))
787 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789 XML_ERR_WARNING, NULL, 0, (const char *) info1,
790 (const char *) info2, (const char *) info3, 0, 0, msg,
791 info1, info2, info3);
792}
793
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000794/************************************************************************
795 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800796 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797 * *
798 ************************************************************************/
799
800/**
801 * xmlHasFeature:
802 * @feature: the feature to be examined
803 *
804 * Examines if the library has been compiled with a given feature.
805 *
806 * Returns a non-zero value if the feature exist, otherwise zero.
807 * Returns zero (0) if the feature does not exist or an unknown
808 * unknown feature is requested, non-zero otherwise.
809 */
810int
811xmlHasFeature(xmlFeature feature)
812{
813 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_THREAD_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_TREE_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_OUTPUT_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_PUSH_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_READER_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_PATTERN_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_WRITER_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_SAX1_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_FTP_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_HTTP_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_VALID_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_HTML_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_LEGACY_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_C14N_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_CATALOG_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_XPATH_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_XPTR_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_XINCLUDE_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_ICONV_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_ISO8859X_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_UNICODE_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_REGEXP_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_AUTOMATA_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_EXPR_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_SCHEMAS_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_SCHEMATRON_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_MODULES_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_DEBUG_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef DEBUG_MEMORY_LOCATION
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_DEBUG_RUNTIME
990 return(1);
991#else
992 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000993#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000994 case XML_WITH_ZLIB:
995#ifdef LIBXML_ZLIB_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001000 case XML_WITH_LZMA:
1001#ifdef LIBXML_LZMA_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 case XML_WITH_ICU:
1007#ifdef LIBXML_ICU_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012 default:
1013 break;
1014 }
1015 return(0);
1016}
1017
1018/************************************************************************
1019 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001020 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 * *
1022 ************************************************************************/
1023
1024/**
1025 * xmlDetectSAX2:
1026 * @ctxt: an XML parser context
1027 *
1028 * Do the SAX2 detection and specific intialization
1029 */
1030static void
1031xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035 ((ctxt->sax->startElementNs != NULL) ||
1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001037#else
1038 ctxt->sax2 = 1;
1039#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040
1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001046 xmlErrMemory(ctxt, NULL);
1047 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001048}
1049
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050typedef struct _xmlDefAttrs xmlDefAttrs;
1051typedef xmlDefAttrs *xmlDefAttrsPtr;
1052struct _xmlDefAttrs {
1053 int nbAttrs; /* number of defaulted attributes on that element */
1054 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001055 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057
1058/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001059 * xmlAttrNormalizeSpace:
1060 * @src: the source string
1061 * @dst: the target string
1062 *
1063 * Normalize the space in non CDATA attribute values:
1064 * If the attribute type is not CDATA, then the XML processor MUST further
1065 * process the normalized attribute value by discarding any leading and
1066 * trailing space (#x20) characters, and by replacing sequences of space
1067 * (#x20) characters by a single space (#x20) character.
1068 * Note that the size of dst need to be at least src, and if one doesn't need
1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070 * passing src as dst is just fine.
1071 *
1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073 * is needed.
1074 */
1075static xmlChar *
1076xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077{
1078 if ((src == NULL) || (dst == NULL))
1079 return(NULL);
1080
1081 while (*src == 0x20) src++;
1082 while (*src != 0) {
1083 if (*src == 0x20) {
1084 while (*src == 0x20) src++;
1085 if (*src != 0)
1086 *dst++ = 0x20;
1087 } else {
1088 *dst++ = *src++;
1089 }
1090 }
1091 *dst = 0;
1092 if (dst == src)
1093 return(NULL);
1094 return(dst);
1095}
1096
1097/**
1098 * xmlAttrNormalizeSpace2:
1099 * @src: the source string
1100 *
1101 * Normalize the space in non CDATA attribute values, a slightly more complex
1102 * front end to avoid allocation problems when running on attribute values
1103 * coming from the input.
1104 *
1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106 * is needed.
1107 */
1108static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001109xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001110{
1111 int i;
1112 int remove_head = 0;
1113 int need_realloc = 0;
1114 const xmlChar *cur;
1115
1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117 return(NULL);
1118 i = *len;
1119 if (i <= 0)
1120 return(NULL);
1121
1122 cur = src;
1123 while (*cur == 0x20) {
1124 cur++;
1125 remove_head++;
1126 }
1127 while (*cur != 0) {
1128 if (*cur == 0x20) {
1129 cur++;
1130 if ((*cur == 0x20) || (*cur == 0)) {
1131 need_realloc = 1;
1132 break;
1133 }
1134 } else
1135 cur++;
1136 }
1137 if (need_realloc) {
1138 xmlChar *ret;
1139
1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141 if (ret == NULL) {
1142 xmlErrMemory(ctxt, NULL);
1143 return(NULL);
1144 }
1145 xmlAttrNormalizeSpace(ret, ret);
1146 *len = (int) strlen((const char *)ret);
1147 return(ret);
1148 } else if (remove_head) {
1149 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 memmove(src, src + remove_head, 1 + *len);
1151 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001152 }
1153 return(NULL);
1154}
1155
1156/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 * xmlAddDefAttrs:
1158 * @ctxt: an XML parser context
1159 * @fullname: the element fullname
1160 * @fullattr: the attribute fullname
1161 * @value: the attribute value
1162 *
1163 * Add a defaulted attribute for an element
1164 */
1165static void
1166xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167 const xmlChar *fullname,
1168 const xmlChar *fullattr,
1169 const xmlChar *value) {
1170 xmlDefAttrsPtr defaults;
1171 int len;
1172 const xmlChar *name;
1173 const xmlChar *prefix;
1174
Daniel Veillard6a31b832008-03-26 14:06:44 +00001175 /*
1176 * Allows to detect attribute redefinitions
1177 */
1178 if (ctxt->attsSpecial != NULL) {
1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180 return;
1181 }
1182
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 if (ctxt->attsDefault == NULL)
1186 goto mem_error;
1187 }
1188
1189 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 * split the element name into prefix:localname , the string found
1191 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 */
1193 name = xmlSplitQName3(fullname, &len);
1194 if (name == NULL) {
1195 name = xmlDictLookup(ctxt->dict, fullname, -1);
1196 prefix = NULL;
1197 } else {
1198 name = xmlDictLookup(ctxt->dict, name, -1);
1199 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200 }
1201
1202 /*
1203 * make sure there is some storage
1204 */
1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206 if (defaults == NULL) {
1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 if (defaults == NULL)
1210 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001212 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214 defaults, NULL) < 0) {
1215 xmlFree(defaults);
1216 goto mem_error;
1217 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001219 xmlDefAttrsPtr temp;
1220
1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001223 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228 defaults, NULL) < 0) {
1229 xmlFree(defaults);
1230 goto mem_error;
1231 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 }
1233
1234 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001235 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001236 * are within the DTD and hen not associated to namespace names.
1237 */
1238 name = xmlSplitQName3(fullattr, &len);
1239 if (name == NULL) {
1240 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241 prefix = NULL;
1242 } else {
1243 name = xmlDictLookup(ctxt->dict, name, -1);
1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245 }
1246
Daniel Veillardae0765b2008-07-31 19:54:59 +00001247 defaults->values[5 * defaults->nbAttrs] = name;
1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 /* intern the string and precompute the end */
1250 len = xmlStrlen(value);
1251 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001252 defaults->values[5 * defaults->nbAttrs + 2] = value;
1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254 if (ctxt->external)
1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256 else
1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 defaults->nbAttrs++;
1259
1260 return;
1261
1262mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001263 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 return;
1265}
1266
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001267/**
1268 * xmlAddSpecialAttr:
1269 * @ctxt: an XML parser context
1270 * @fullname: the element fullname
1271 * @fullattr: the attribute fullname
1272 * @type: the attribute type
1273 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001275 */
1276static void
1277xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278 const xmlChar *fullname,
1279 const xmlChar *fullattr,
1280 int type)
1281{
1282 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001284 if (ctxt->attsSpecial == NULL)
1285 goto mem_error;
1286 }
1287
Daniel Veillardac4118d2008-01-11 05:27:32 +00001288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289 return;
1290
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001293 return;
1294
1295mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001296 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001297 return;
1298}
1299
Daniel Veillard4432df22003-09-28 18:58:27 +00001300/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001301 * xmlCleanSpecialAttrCallback:
1302 *
1303 * Removes CDATA attributes from the special attribute table
1304 */
1305static void
1306xmlCleanSpecialAttrCallback(void *payload, void *data,
1307 const xmlChar *fullname, const xmlChar *fullattr,
1308 const xmlChar *unused ATTRIBUTE_UNUSED) {
1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001311 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313 }
1314}
1315
1316/**
1317 * xmlCleanSpecialAttr:
1318 * @ctxt: an XML parser context
1319 *
1320 * Trim the list of attributes defined to remove all those of type
1321 * CDATA as they are not special. This call should be done when finishing
1322 * to parse the DTD and before starting to parse the document root.
1323 */
1324static void
1325xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326{
1327 if (ctxt->attsSpecial == NULL)
1328 return;
1329
1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331
1332 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333 xmlHashFree(ctxt->attsSpecial, NULL);
1334 ctxt->attsSpecial = NULL;
1335 }
1336 return;
1337}
1338
1339/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001340 * xmlCheckLanguageID:
1341 * @lang: pointer to the string value
1342 *
1343 * Checks that the value conforms to the LanguageID production:
1344 *
1345 * NOTE: this is somewhat deprecated, those productions were removed from
1346 * the XML Second edition.
1347 *
1348 * [33] LanguageID ::= Langcode ('-' Subcode)*
1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353 * [38] Subcode ::= ([a-z] | [A-Z])+
1354 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001355 * The current REC reference the sucessors of RFC 1766, currently 5646
1356 *
1357 * http://www.rfc-editor.org/rfc/rfc5646.txt
1358 * langtag = language
1359 * ["-" script]
1360 * ["-" region]
1361 * *("-" variant)
1362 * *("-" extension)
1363 * ["-" privateuse]
1364 * language = 2*3ALPHA ; shortest ISO 639 code
1365 * ["-" extlang] ; sometimes followed by
1366 * ; extended language subtags
1367 * / 4ALPHA ; or reserved for future use
1368 * / 5*8ALPHA ; or registered language subtag
1369 *
1370 * extlang = 3ALPHA ; selected ISO 639 codes
1371 * *2("-" 3ALPHA) ; permanently reserved
1372 *
1373 * script = 4ALPHA ; ISO 15924 code
1374 *
1375 * region = 2ALPHA ; ISO 3166-1 code
1376 * / 3DIGIT ; UN M.49 code
1377 *
1378 * variant = 5*8alphanum ; registered variants
1379 * / (DIGIT 3alphanum)
1380 *
1381 * extension = singleton 1*("-" (2*8alphanum))
1382 *
1383 * ; Single alphanumerics
1384 * ; "x" reserved for private use
1385 * singleton = DIGIT ; 0 - 9
1386 * / %x41-57 ; A - W
1387 * / %x59-5A ; Y - Z
1388 * / %x61-77 ; a - w
1389 * / %x79-7A ; y - z
1390 *
1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392 * The parser below doesn't try to cope with extension or privateuse
1393 * that could be added but that's not interoperable anyway
1394 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001395 * Returns 1 if correct 0 otherwise
1396 **/
1397int
1398xmlCheckLanguageID(const xmlChar * lang)
1399{
Daniel Veillard60587d62010-11-04 15:16:27 +01001400 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001401
1402 if (cur == NULL)
1403 return (0);
1404 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001405 ((cur[0] == 'I') && (cur[1] == '-')) ||
1406 ((cur[0] == 'x') && (cur[1] == '-')) ||
1407 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001408 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001409 * Still allow IANA code and user code which were coming
1410 * from the previous version of the XML-1.0 specification
1411 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001412 */
1413 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001415 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001417 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001418 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001419 nxt = cur;
1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422 nxt++;
1423 if (nxt - cur >= 4) {
1424 /*
1425 * Reserved
1426 */
1427 if ((nxt - cur > 8) || (nxt[0] != 0))
1428 return(0);
1429 return(1);
1430 }
1431 if (nxt - cur < 2)
1432 return(0);
1433 /* we got an ISO 639 code */
1434 if (nxt[0] == 0)
1435 return(1);
1436 if (nxt[0] != '-')
1437 return(0);
1438
1439 nxt++;
1440 cur = nxt;
1441 /* now we can have extlang or script or region or variant */
1442 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443 goto region_m49;
1444
1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447 nxt++;
1448 if (nxt - cur == 4)
1449 goto script;
1450 if (nxt - cur == 2)
1451 goto region;
1452 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453 goto variant;
1454 if (nxt - cur != 3)
1455 return(0);
1456 /* we parsed an extlang */
1457 if (nxt[0] == 0)
1458 return(1);
1459 if (nxt[0] != '-')
1460 return(0);
1461
1462 nxt++;
1463 cur = nxt;
1464 /* now we can have script or region or variant */
1465 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466 goto region_m49;
1467
1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470 nxt++;
1471 if (nxt - cur == 2)
1472 goto region;
1473 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474 goto variant;
1475 if (nxt - cur != 4)
1476 return(0);
1477 /* we parsed a script */
1478script:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483
1484 nxt++;
1485 cur = nxt;
1486 /* now we can have region or variant */
1487 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488 goto region_m49;
1489
1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492 nxt++;
1493
1494 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495 goto variant;
1496 if (nxt - cur != 2)
1497 return(0);
1498 /* we parsed a region */
1499region:
1500 if (nxt[0] == 0)
1501 return(1);
1502 if (nxt[0] != '-')
1503 return(0);
1504
1505 nxt++;
1506 cur = nxt;
1507 /* now we can just have a variant */
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511
1512 if ((nxt - cur < 5) || (nxt - cur > 8))
1513 return(0);
1514
1515 /* we parsed a variant */
1516variant:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001522 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001523
1524region_m49:
1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527 nxt += 3;
1528 goto region;
1529 }
1530 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001531}
1532
Owen Taylor3473f882001-02-23 17:55:21 +00001533/************************************************************************
1534 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001535 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001536 * *
1537 ************************************************************************/
1538
Daniel Veillard8ed10722009-08-20 19:17:36 +02001539static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001541
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542#ifdef SAX2
1543/**
1544 * nsPush:
1545 * @ctxt: an XML parser context
1546 * @prefix: the namespace prefix or NULL
1547 * @URL: the namespace name
1548 *
1549 * Pushes a new parser namespace on top of the ns stack
1550 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001551 * Returns -1 in case of error, -2 if the namespace should be discarded
1552 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 */
1554static int
1555nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001557 if (ctxt->options & XML_PARSE_NSCLEAN) {
1558 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001560 if (ctxt->nsTab[i] == prefix) {
1561 /* in scope */
1562 if (ctxt->nsTab[i + 1] == URL)
1563 return(-2);
1564 /* out of scope keep it */
1565 break;
1566 }
1567 }
1568 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570 ctxt->nsMax = 10;
1571 ctxt->nsNr = 0;
1572 ctxt->nsTab = (const xmlChar **)
1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001575 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001576 ctxt->nsMax = 0;
1577 return (-1);
1578 }
1579 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001580 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001586 ctxt->nsMax /= 2;
1587 return (-1);
1588 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001589 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 }
1591 ctxt->nsTab[ctxt->nsNr++] = prefix;
1592 ctxt->nsTab[ctxt->nsNr++] = URL;
1593 return (ctxt->nsNr);
1594}
1595/**
1596 * nsPop:
1597 * @ctxt: an XML parser context
1598 * @nr: the number to pop
1599 *
1600 * Pops the top @nr parser prefix/namespace from the ns stack
1601 *
1602 * Returns the number of namespaces removed
1603 */
1604static int
1605nsPop(xmlParserCtxtPtr ctxt, int nr)
1606{
1607 int i;
1608
1609 if (ctxt->nsTab == NULL) return(0);
1610 if (ctxt->nsNr < nr) {
1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612 nr = ctxt->nsNr;
1613 }
1614 if (ctxt->nsNr <= 0)
1615 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001616
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 for (i = 0;i < nr;i++) {
1618 ctxt->nsNr--;
1619 ctxt->nsTab[ctxt->nsNr] = NULL;
1620 }
1621 return(nr);
1622}
1623#endif
1624
1625static int
1626xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001628 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001629 int maxatts;
1630
1631 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001633 atts = (const xmlChar **)
1634 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638 if (attallocs == NULL) goto mem_error;
1639 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001640 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001641 } else if (nr + 5 > ctxt->maxatts) {
1642 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001645 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001646 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648 (maxatts / 5) * sizeof(int));
1649 if (attallocs == NULL) goto mem_error;
1650 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001651 ctxt->maxatts = maxatts;
1652 }
1653 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001654mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657}
1658
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659/**
1660 * inputPush:
1661 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001662 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 *
1664 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001665 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001666 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001667 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001668int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001671 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001672 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 if (ctxt->inputNr >= ctxt->inputMax) {
1674 ctxt->inputMax *= 2;
1675 ctxt->inputTab =
1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677 ctxt->inputMax *
1678 sizeof(ctxt->inputTab[0]));
1679 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001681 xmlFreeInputStream(value);
1682 ctxt->inputMax /= 2;
1683 value = NULL;
1684 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001685 }
1686 }
1687 ctxt->inputTab[ctxt->inputNr] = value;
1688 ctxt->input = value;
1689 return (ctxt->inputNr++);
1690}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001693 * @ctxt: an XML parser context
1694 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001696 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001699xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700inputPop(xmlParserCtxtPtr ctxt)
1701{
1702 xmlParserInputPtr ret;
1703
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001704 if (ctxt == NULL)
1705 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001707 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 ctxt->inputNr--;
1709 if (ctxt->inputNr > 0)
1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711 else
1712 ctxt->input = NULL;
1713 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001714 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715 return (ret);
1716}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001717/**
1718 * nodePush:
1719 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001720 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 *
1722 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001723 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001724 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001729 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001731 xmlNodePtr *tmp;
1732
1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001736 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001737 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001738 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001739 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001740 ctxt->nodeTab = tmp;
1741 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001742 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001747 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001748 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001749 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001750 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001751 ctxt->nodeTab[ctxt->nodeNr] = value;
1752 ctxt->node = value;
1753 return (ctxt->nodeNr++);
1754}
Daniel Veillard8915c152008-08-26 13:05:34 +00001755
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756/**
1757 * nodePop:
1758 * @ctxt: an XML parser context
1759 *
1760 * Pops the top element node from the node stack
1761 *
1762 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001763 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001764xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765nodePop(xmlParserCtxtPtr ctxt)
1766{
1767 xmlNodePtr ret;
1768
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001771 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001772 ctxt->nodeNr--;
1773 if (ctxt->nodeNr > 0)
1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775 else
1776 ctxt->node = NULL;
1777 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001778 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 return (ret);
1780}
Daniel Veillarda2351322004-06-27 12:08:10 +00001781
1782#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001784 * nameNsPush:
1785 * @ctxt: an XML parser context
1786 * @value: the element name
1787 * @prefix: the element prefix
1788 * @URI: the element namespace name
1789 *
1790 * Pushes a new element name/prefix/URL on top of the name stack
1791 *
1792 * Returns -1 in case of error, the index in the stack otherwise
1793 */
1794static int
1795nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797{
1798 if (ctxt->nameNr >= ctxt->nameMax) {
1799 const xmlChar * *tmp;
1800 void **tmp2;
1801 ctxt->nameMax *= 2;
1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803 ctxt->nameMax *
1804 sizeof(ctxt->nameTab[0]));
1805 if (tmp == NULL) {
1806 ctxt->nameMax /= 2;
1807 goto mem_error;
1808 }
1809 ctxt->nameTab = tmp;
1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811 ctxt->nameMax * 3 *
1812 sizeof(ctxt->pushTab[0]));
1813 if (tmp2 == NULL) {
1814 ctxt->nameMax /= 2;
1815 goto mem_error;
1816 }
1817 ctxt->pushTab = tmp2;
1818 }
1819 ctxt->nameTab[ctxt->nameNr] = value;
1820 ctxt->name = value;
1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 return (ctxt->nameNr++);
1825mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 return (-1);
1828}
1829/**
1830 * nameNsPop:
1831 * @ctxt: an XML parser context
1832 *
1833 * Pops the top element/prefix/URI name from the name stack
1834 *
1835 * Returns the name just removed
1836 */
1837static const xmlChar *
1838nameNsPop(xmlParserCtxtPtr ctxt)
1839{
1840 const xmlChar *ret;
1841
1842 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001843 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844 ctxt->nameNr--;
1845 if (ctxt->nameNr > 0)
1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847 else
1848 ctxt->name = NULL;
1849 ret = ctxt->nameTab[ctxt->nameNr];
1850 ctxt->nameTab[ctxt->nameNr] = NULL;
1851 return (ret);
1852}
Daniel Veillarda2351322004-06-27 12:08:10 +00001853#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854
1855/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001856 * namePush:
1857 * @ctxt: an XML parser context
1858 * @value: the element name
1859 *
1860 * Pushes a new element name on top of the name stack
1861 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001864int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001865namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001866{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001867 if (ctxt == NULL) return (-1);
1868
Daniel Veillard1c732d22002-11-30 11:22:59 +00001869 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001872 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001873 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001874 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001875 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001878 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001879 }
1880 ctxt->nameTab[ctxt->nameNr] = value;
1881 ctxt->name = value;
1882 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001883mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001884 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886}
1887/**
1888 * namePop:
1889 * @ctxt: an XML parser context
1890 *
1891 * Pops the top element name from the name stack
1892 *
1893 * Returns the name just removed
1894 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001895const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896namePop(xmlParserCtxtPtr ctxt)
1897{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001898 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001899
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001900 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 ctxt->nameNr--;
1903 if (ctxt->nameNr > 0)
1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905 else
1906 ctxt->name = NULL;
1907 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001908 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 return (ret);
1910}
Owen Taylor3473f882001-02-23 17:55:21 +00001911
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001912static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001913 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001914 int *tmp;
1915
Owen Taylor3473f882001-02-23 17:55:21 +00001916 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001917 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001920 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001921 ctxt->spaceMax /=2;
1922 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001923 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001924 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 }
1926 ctxt->spaceTab[ctxt->spaceNr] = val;
1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928 return(ctxt->spaceNr++);
1929}
1930
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001931static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 int ret;
1933 if (ctxt->spaceNr <= 0) return(0);
1934 ctxt->spaceNr--;
1935 if (ctxt->spaceNr > 0)
1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001938 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001939 ret = ctxt->spaceTab[ctxt->spaceNr];
1940 ctxt->spaceTab[ctxt->spaceNr] = -1;
1941 return(ret);
1942}
1943
1944/*
1945 * Macros for accessing the content. Those should be used only by the parser,
1946 * and not exported.
1947 *
1948 * Dirty macros, i.e. one often need to make assumption on the context to
1949 * use them
1950 *
1951 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952 * To be used with extreme caution since operations consuming
1953 * characters may move the input buffer to a different location !
1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955 * This should be used internally by the parser
1956 * only to compare to ASCII values otherwise it would break when
1957 * running with UTF-8 encoding.
1958 * RAW same as CUR but in the input buffer, bypass any token
1959 * extraction that may have been done
1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961 * to compare on ASCII based substring.
1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001963 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967 *
1968 * NEXT Skip to the next character, this does the proper decoding
1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001970 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001971 * CUR_CHAR(l) returns the current unicode character (int), set l
1972 * to the number of xmlChars used for the encoding [0-5].
1973 * CUR_SCHAR same but operate on a string instead of the context
1974 * COPY_BUF copy the current unicode char to the target buffer, increment
1975 * the index
1976 * GROW, SHRINK handling of input buffers
1977 */
1978
Daniel Veillardfdc91562002-07-01 21:52:03 +00001979#define RAW (*ctxt->input->cur)
1980#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001981#define NXT(val) ctxt->input->cur[(val)]
1982#define CUR_PTR ctxt->input->cur
1983
Daniel Veillarda07050d2003-10-19 14:46:32 +00001984#define CMP4( s, c1, c2, c3, c4 ) \
1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997 ((unsigned char *) s)[ 8 ] == c9 )
1998#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000 ((unsigned char *) s)[ 9 ] == c10 )
2001
Owen Taylor3473f882001-02-23 17:55:21 +00002002#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002005 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007 xmlPopInput(ctxt); \
2008 } while (0)
2009
Daniel Veillard0b787f32004-03-26 17:29:53 +00002010#define SKIPL(val) do { \
2011 int skipl; \
2012 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002013 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002014 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002015 } else ctxt->input->col++; \
2016 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002017 ctxt->input->cur++; \
2018 } \
2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 if ((*ctxt->input->cur == 0) && \
2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022 xmlPopInput(ctxt); \
2023 } while (0)
2024
Daniel Veillarda880b122003-04-21 21:36:41 +00002025#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002028 xmlSHRINK (ctxt);
2029
2030static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031 xmlParserInputShrink(ctxt->input);
2032 if ((*ctxt->input->cur == 0) &&
2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002035 }
Owen Taylor3473f882001-02-23 17:55:21 +00002036
Daniel Veillarda880b122003-04-21 21:36:41 +00002037#define GROW if ((ctxt->progressive == 0) && \
2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002039 xmlGROW (ctxt);
2040
2041static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002042 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2043 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2044
2045 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2046 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002047 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002048 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2049 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002050 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002051 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002053 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002054 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2055 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002056}
Owen Taylor3473f882001-02-23 17:55:21 +00002057
2058#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2059
2060#define NEXT xmlNextChar(ctxt)
2061
Daniel Veillard21a0f912001-02-25 19:54:14 +00002062#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002063 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002064 ctxt->input->cur++; \
2065 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002066 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002067 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2068 }
2069
Owen Taylor3473f882001-02-23 17:55:21 +00002070#define NEXTL(l) do { \
2071 if (*(ctxt->input->cur) == '\n') { \
2072 ctxt->input->line++; ctxt->input->col = 1; \
2073 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002074 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002075 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002076 } while (0)
2077
2078#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2079#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2080
2081#define COPY_BUF(l,b,i,v) \
2082 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002083 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002084
2085/**
2086 * xmlSkipBlankChars:
2087 * @ctxt: the XML parser context
2088 *
2089 * skip all blanks character found at that point in the input streams.
2090 * It pops up finished entities in the process if allowable at that point.
2091 *
2092 * Returns the number of space chars skipped
2093 */
2094
2095int
2096xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002097 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002098
2099 /*
2100 * It's Okay to use CUR/NEXT here since all the blanks are on
2101 * the ASCII range.
2102 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002103 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2104 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002105 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002106 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002107 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002108 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002109 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002110 if (*cur == '\n') {
2111 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002112 } else {
2113 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002114 }
2115 cur++;
2116 res++;
2117 if (*cur == 0) {
2118 ctxt->input->cur = cur;
2119 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2120 cur = ctxt->input->cur;
2121 }
2122 }
2123 ctxt->input->cur = cur;
2124 } else {
2125 int cur;
2126 do {
2127 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002128 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002129 NEXT;
2130 cur = CUR;
2131 res++;
2132 }
2133 while ((cur == 0) && (ctxt->inputNr > 1) &&
2134 (ctxt->instate != XML_PARSER_COMMENT)) {
2135 xmlPopInput(ctxt);
2136 cur = CUR;
2137 }
2138 /*
2139 * Need to handle support of entities branching here
2140 */
2141 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2142 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2143 }
Owen Taylor3473f882001-02-23 17:55:21 +00002144 return(res);
2145}
2146
2147/************************************************************************
2148 * *
2149 * Commodity functions to handle entities *
2150 * *
2151 ************************************************************************/
2152
2153/**
2154 * xmlPopInput:
2155 * @ctxt: an XML parser context
2156 *
2157 * xmlPopInput: the current input pointed by ctxt->input came to an end
2158 * pop it and return the next char.
2159 *
2160 * Returns the current xmlChar in the parser context
2161 */
2162xmlChar
2163xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002164 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002165 if (xmlParserDebugEntities)
2166 xmlGenericError(xmlGenericErrorContext,
2167 "Popping input %d\n", ctxt->inputNr);
2168 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002169 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002170 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2171 return(xmlPopInput(ctxt));
2172 return(CUR);
2173}
2174
2175/**
2176 * xmlPushInput:
2177 * @ctxt: an XML parser context
2178 * @input: an XML parser input fragment (entity, XML fragment ...).
2179 *
2180 * xmlPushInput: switch to a new input stream which is stacked on top
2181 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002182 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002183 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002184int
Owen Taylor3473f882001-02-23 17:55:21 +00002185xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002186 int ret;
2187 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002188
2189 if (xmlParserDebugEntities) {
2190 if ((ctxt->input != NULL) && (ctxt->input->filename))
2191 xmlGenericError(xmlGenericErrorContext,
2192 "%s(%d): ", ctxt->input->filename,
2193 ctxt->input->line);
2194 xmlGenericError(xmlGenericErrorContext,
2195 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2196 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002197 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002198 if (ctxt->instate == XML_PARSER_EOF)
2199 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002200 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002201 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002202}
2203
2204/**
2205 * xmlParseCharRef:
2206 * @ctxt: an XML parser context
2207 *
2208 * parse Reference declarations
2209 *
2210 * [66] CharRef ::= '&#' [0-9]+ ';' |
2211 * '&#x' [0-9a-fA-F]+ ';'
2212 *
2213 * [ WFC: Legal Character ]
2214 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002215 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002216 *
2217 * Returns the value parsed (as an int), 0 in case of error
2218 */
2219int
2220xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002221 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002222 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002223 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002224
Owen Taylor3473f882001-02-23 17:55:21 +00002225 /*
2226 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2227 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002228 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002229 (NXT(2) == 'x')) {
2230 SKIP(3);
2231 GROW;
2232 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002233 if (count++ > 20) {
2234 count = 0;
2235 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002236 if (ctxt->instate == XML_PARSER_EOF)
2237 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002238 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002239 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002240 val = val * 16 + (CUR - '0');
2241 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2242 val = val * 16 + (CUR - 'a') + 10;
2243 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2244 val = val * 16 + (CUR - 'A') + 10;
2245 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002246 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002247 val = 0;
2248 break;
2249 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002250 if (val > 0x10FFFF)
2251 outofrange = val;
2252
Owen Taylor3473f882001-02-23 17:55:21 +00002253 NEXT;
2254 count++;
2255 }
2256 if (RAW == ';') {
2257 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002258 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002259 ctxt->nbChars ++;
2260 ctxt->input->cur++;
2261 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002262 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002263 SKIP(2);
2264 GROW;
2265 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002266 if (count++ > 20) {
2267 count = 0;
2268 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002269 if (ctxt->instate == XML_PARSER_EOF)
2270 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002271 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002272 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002273 val = val * 10 + (CUR - '0');
2274 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002275 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002276 val = 0;
2277 break;
2278 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002279 if (val > 0x10FFFF)
2280 outofrange = val;
2281
Owen Taylor3473f882001-02-23 17:55:21 +00002282 NEXT;
2283 count++;
2284 }
2285 if (RAW == ';') {
2286 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002287 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002288 ctxt->nbChars ++;
2289 ctxt->input->cur++;
2290 }
2291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 }
2294
2295 /*
2296 * [ WFC: Legal Character ]
2297 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002298 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002299 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002300 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002301 return(val);
2302 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002303 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2304 "xmlParseCharRef: invalid xmlChar value %d\n",
2305 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002306 }
2307 return(0);
2308}
2309
2310/**
2311 * xmlParseStringCharRef:
2312 * @ctxt: an XML parser context
2313 * @str: a pointer to an index in the string
2314 *
2315 * parse Reference declarations, variant parsing from a string rather
2316 * than an an input flow.
2317 *
2318 * [66] CharRef ::= '&#' [0-9]+ ';' |
2319 * '&#x' [0-9a-fA-F]+ ';'
2320 *
2321 * [ WFC: Legal Character ]
2322 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002323 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002324 *
2325 * Returns the value parsed (as an int), 0 in case of error, str will be
2326 * updated to the current value of the index
2327 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002328static int
Owen Taylor3473f882001-02-23 17:55:21 +00002329xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2330 const xmlChar *ptr;
2331 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002332 unsigned int val = 0;
2333 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002334
2335 if ((str == NULL) || (*str == NULL)) return(0);
2336 ptr = *str;
2337 cur = *ptr;
2338 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2339 ptr += 3;
2340 cur = *ptr;
2341 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002342 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002343 val = val * 16 + (cur - '0');
2344 else if ((cur >= 'a') && (cur <= 'f'))
2345 val = val * 16 + (cur - 'a') + 10;
2346 else if ((cur >= 'A') && (cur <= 'F'))
2347 val = val * 16 + (cur - 'A') + 10;
2348 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002349 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002350 val = 0;
2351 break;
2352 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002353 if (val > 0x10FFFF)
2354 outofrange = val;
2355
Owen Taylor3473f882001-02-23 17:55:21 +00002356 ptr++;
2357 cur = *ptr;
2358 }
2359 if (cur == ';')
2360 ptr++;
2361 } else if ((cur == '&') && (ptr[1] == '#')){
2362 ptr += 2;
2363 cur = *ptr;
2364 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002365 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002366 val = val * 10 + (cur - '0');
2367 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002368 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002369 val = 0;
2370 break;
2371 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002372 if (val > 0x10FFFF)
2373 outofrange = val;
2374
Owen Taylor3473f882001-02-23 17:55:21 +00002375 ptr++;
2376 cur = *ptr;
2377 }
2378 if (cur == ';')
2379 ptr++;
2380 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002381 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002382 return(0);
2383 }
2384 *str = ptr;
2385
2386 /*
2387 * [ WFC: Legal Character ]
2388 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002389 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002390 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002391 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002392 return(val);
2393 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002394 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2395 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2396 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002397 }
2398 return(0);
2399}
2400
2401/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002402 * xmlNewBlanksWrapperInputStream:
2403 * @ctxt: an XML parser context
2404 * @entity: an Entity pointer
2405 *
2406 * Create a new input stream for wrapping
2407 * blanks around a PEReference
2408 *
2409 * Returns the new input stream or NULL
2410 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002411
Daniel Veillardf5582f12002-06-11 10:08:16 +00002412static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002413
Daniel Veillardf4862f02002-09-10 11:13:43 +00002414static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002415xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2416 xmlParserInputPtr input;
2417 xmlChar *buffer;
2418 size_t length;
2419 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002420 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2421 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002422 return(NULL);
2423 }
2424 if (xmlParserDebugEntities)
2425 xmlGenericError(xmlGenericErrorContext,
2426 "new blanks wrapper for entity: %s\n", entity->name);
2427 input = xmlNewInputStream(ctxt);
2428 if (input == NULL) {
2429 return(NULL);
2430 }
2431 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002432 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002433 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002434 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002435 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002436 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002437 }
2438 buffer [0] = ' ';
2439 buffer [1] = '%';
2440 buffer [length-3] = ';';
2441 buffer [length-2] = ' ';
2442 buffer [length-1] = 0;
2443 memcpy(buffer + 2, entity->name, length - 5);
2444 input->free = deallocblankswrapper;
2445 input->base = buffer;
2446 input->cur = buffer;
2447 input->length = length;
2448 input->end = &buffer[length];
2449 return(input);
2450}
2451
2452/**
Owen Taylor3473f882001-02-23 17:55:21 +00002453 * xmlParserHandlePEReference:
2454 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002455 *
Owen Taylor3473f882001-02-23 17:55:21 +00002456 * [69] PEReference ::= '%' Name ';'
2457 *
2458 * [ WFC: No Recursion ]
2459 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002460 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002461 *
2462 * [ WFC: Entity Declared ]
2463 * In a document without any DTD, a document with only an internal DTD
2464 * subset which contains no parameter entity references, or a document
2465 * with "standalone='yes'", ... ... The declaration of a parameter
2466 * entity must precede any reference to it...
2467 *
2468 * [ VC: Entity Declared ]
2469 * In a document with an external subset or external parameter entities
2470 * with "standalone='no'", ... ... The declaration of a parameter entity
2471 * must precede any reference to it...
2472 *
2473 * [ WFC: In DTD ]
2474 * Parameter-entity references may only appear in the DTD.
2475 * NOTE: misleading but this is handled.
2476 *
2477 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002478 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002479 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002480 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002481 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002482 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002483 */
2484void
2485xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002486 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002487 xmlEntityPtr entity = NULL;
2488 xmlParserInputPtr input;
2489
Owen Taylor3473f882001-02-23 17:55:21 +00002490 if (RAW != '%') return;
2491 switch(ctxt->instate) {
2492 case XML_PARSER_CDATA_SECTION:
2493 return;
2494 case XML_PARSER_COMMENT:
2495 return;
2496 case XML_PARSER_START_TAG:
2497 return;
2498 case XML_PARSER_END_TAG:
2499 return;
2500 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002501 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002502 return;
2503 case XML_PARSER_PROLOG:
2504 case XML_PARSER_START:
2505 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002506 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002507 return;
2508 case XML_PARSER_ENTITY_DECL:
2509 case XML_PARSER_CONTENT:
2510 case XML_PARSER_ATTRIBUTE_VALUE:
2511 case XML_PARSER_PI:
2512 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002513 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002514 /* we just ignore it there */
2515 return;
2516 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002517 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002518 return;
2519 case XML_PARSER_ENTITY_VALUE:
2520 /*
2521 * NOTE: in the case of entity values, we don't do the
2522 * substitution here since we need the literal
2523 * entity value to be able to save the internal
2524 * subset of the document.
2525 * This will be handled by xmlStringDecodeEntities
2526 */
2527 return;
2528 case XML_PARSER_DTD:
2529 /*
2530 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2531 * In the internal DTD subset, parameter-entity references
2532 * can occur only where markup declarations can occur, not
2533 * within markup declarations.
2534 * In that case this is handled in xmlParseMarkupDecl
2535 */
2536 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2537 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002538 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002539 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002540 break;
2541 case XML_PARSER_IGNORE:
2542 return;
2543 }
2544
2545 NEXT;
2546 name = xmlParseName(ctxt);
2547 if (xmlParserDebugEntities)
2548 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002549 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002550 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002551 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002552 } else {
2553 if (RAW == ';') {
2554 NEXT;
2555 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2556 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002557 if (ctxt->instate == XML_PARSER_EOF)
2558 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002559 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002560
Owen Taylor3473f882001-02-23 17:55:21 +00002561 /*
2562 * [ WFC: Entity Declared ]
2563 * In a document without any DTD, a document with only an
2564 * internal DTD subset which contains no parameter entity
2565 * references, or a document with "standalone='yes'", ...
2566 * ... The declaration of a parameter entity must precede
2567 * any reference to it...
2568 */
2569 if ((ctxt->standalone == 1) ||
2570 ((ctxt->hasExternalSubset == 0) &&
2571 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002572 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002573 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002574 } else {
2575 /*
2576 * [ VC: Entity Declared ]
2577 * In a document with an external subset or external
2578 * parameter entities with "standalone='no'", ...
2579 * ... The declaration of a parameter entity must precede
2580 * any reference to it...
2581 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002582 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2583 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2584 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002585 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002586 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002587 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2588 "PEReference: %%%s; not found\n",
2589 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002590 ctxt->valid = 0;
2591 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002592 } else if (ctxt->input->free != deallocblankswrapper) {
2593 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002594 if (xmlPushInput(ctxt, input) < 0)
2595 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002596 } else {
2597 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2598 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002599 xmlChar start[4];
2600 xmlCharEncoding enc;
2601
Owen Taylor3473f882001-02-23 17:55:21 +00002602 /*
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002603 * Note: external parameter entities will not be loaded, it
2604 * is not required for a non-validating parser, unless the
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002605 * option of validating, or substituting entities were
2606 * given. Doing so is far more secure as the parser will
2607 * only process data coming from the document entity by
2608 * default.
2609 */
2610 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2611 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2612 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002613 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2614 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2615 (ctxt->replaceEntities == 0) &&
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002616 (ctxt->validate == 0))
2617 return;
2618
2619 /*
Owen Taylor3473f882001-02-23 17:55:21 +00002620 * handle the extra spaces added before and after
2621 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002622 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002623 */
2624 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002625 if (xmlPushInput(ctxt, input) < 0)
2626 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002627
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002628 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002629 * Get the 4 first bytes and decode the charset
2630 * if enc != XML_CHAR_ENCODING_NONE
2631 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002632 * Note that, since we may have some non-UTF8
2633 * encoding (like UTF16, bug 135229), the 'length'
2634 * is not known, but we can calculate based upon
2635 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002636 */
2637 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002638 if (ctxt->instate == XML_PARSER_EOF)
2639 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002640 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002641 start[0] = RAW;
2642 start[1] = NXT(1);
2643 start[2] = NXT(2);
2644 start[3] = NXT(3);
2645 enc = xmlDetectCharEncoding(start, 4);
2646 if (enc != XML_CHAR_ENCODING_NONE) {
2647 xmlSwitchEncoding(ctxt, enc);
2648 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002649 }
2650
Owen Taylor3473f882001-02-23 17:55:21 +00002651 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002652 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2653 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002654 xmlParseTextDecl(ctxt);
2655 }
Owen Taylor3473f882001-02-23 17:55:21 +00002656 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002657 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2658 "PEReference: %s is not a parameter entity\n",
2659 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002660 }
2661 }
2662 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002663 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002664 }
Owen Taylor3473f882001-02-23 17:55:21 +00002665 }
2666}
2667
2668/*
2669 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002670 * buffer##_size is expected to be a size_t
2671 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002672 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002673#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002674 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002675 size_t new_size = buffer##_size * 2 + n; \
2676 if (new_size < buffer##_size) goto mem_error; \
2677 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002678 if (tmp == NULL) goto mem_error; \
2679 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002680 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002681}
2682
2683/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002684 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002685 * @ctxt: the parser context
2686 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002687 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002688 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2689 * @end: an end marker xmlChar, 0 if none
2690 * @end2: an end marker xmlChar, 0 if none
2691 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002692 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002693 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002694 *
2695 * [67] Reference ::= EntityRef | CharRef
2696 *
2697 * [69] PEReference ::= '%' Name ';'
2698 *
2699 * Returns A newly allocated string with the substitution done. The caller
2700 * must deallocate it !
2701 */
2702xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002703xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2704 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002705 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002706 size_t buffer_size = 0;
2707 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002708
2709 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002710 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002711 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002712 xmlEntityPtr ent;
2713 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002714
Daniel Veillarda82b1822004-11-08 16:24:57 +00002715 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002716 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002717 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002718
Daniel Veillard0161e632008-08-28 15:36:32 +00002719 if (((ctxt->depth > 40) &&
2720 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2721 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002722 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002723 return(NULL);
2724 }
2725
2726 /*
2727 * allocate a translation buffer.
2728 */
2729 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002730 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002731 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002732
2733 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002734 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002735 * we are operating on already parsed values.
2736 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002737 if (str < last)
2738 c = CUR_SCHAR(str, l);
2739 else
2740 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002741 while ((c != 0) && (c != end) && /* non input consuming loop */
2742 (c != end2) && (c != end3)) {
2743
2744 if (c == 0) break;
2745 if ((c == '&') && (str[1] == '#')) {
2746 int val = xmlParseStringCharRef(ctxt, &str);
2747 if (val != 0) {
2748 COPY_BUF(0,buffer,nbchars,val);
2749 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002750 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002751 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002752 }
Owen Taylor3473f882001-02-23 17:55:21 +00002753 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2754 if (xmlParserDebugEntities)
2755 xmlGenericError(xmlGenericErrorContext,
2756 "String decoding Entity Reference: %.30s\n",
2757 str);
2758 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002759 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2760 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002761 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002762 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002763 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002764 if ((ent != NULL) &&
2765 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2766 if (ent->content != NULL) {
2767 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002768 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002769 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002770 }
Owen Taylor3473f882001-02-23 17:55:21 +00002771 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002772 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2773 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002774 }
2775 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002776 ctxt->depth++;
2777 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2778 0, 0, 0);
2779 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002780
Owen Taylor3473f882001-02-23 17:55:21 +00002781 if (rep != NULL) {
2782 current = rep;
2783 while (*current != 0) { /* non input consuming loop */
2784 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002785 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002786 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002787 goto int_error;
2788 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002789 }
2790 }
2791 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002792 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002793 }
2794 } else if (ent != NULL) {
2795 int i = xmlStrlen(ent->name);
2796 const xmlChar *cur = ent->name;
2797
2798 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002799 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002800 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002801 }
2802 for (;i > 0;i--)
2803 buffer[nbchars++] = *cur++;
2804 buffer[nbchars++] = ';';
2805 }
2806 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2807 if (xmlParserDebugEntities)
2808 xmlGenericError(xmlGenericErrorContext,
2809 "String decoding PE Reference: %.30s\n", str);
2810 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002811 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2812 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002813 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002814 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002815 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002816 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002817 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002818 }
Owen Taylor3473f882001-02-23 17:55:21 +00002819 ctxt->depth++;
2820 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2821 0, 0, 0);
2822 ctxt->depth--;
2823 if (rep != NULL) {
2824 current = rep;
2825 while (*current != 0) { /* non input consuming loop */
2826 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002827 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002828 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002829 goto int_error;
2830 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002831 }
2832 }
2833 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002834 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002835 }
2836 }
2837 } else {
2838 COPY_BUF(l,buffer,nbchars,c);
2839 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002840 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2841 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002842 }
2843 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002844 if (str < last)
2845 c = CUR_SCHAR(str, l);
2846 else
2847 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002848 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002849 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002850 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002851
2852mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002853 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002854int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002855 if (rep != NULL)
2856 xmlFree(rep);
2857 if (buffer != NULL)
2858 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002859 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002860}
2861
Daniel Veillarde57ec792003-09-10 10:50:59 +00002862/**
2863 * xmlStringDecodeEntities:
2864 * @ctxt: the parser context
2865 * @str: the input string
2866 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2867 * @end: an end marker xmlChar, 0 if none
2868 * @end2: an end marker xmlChar, 0 if none
2869 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002870 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002871 * Takes a entity string content and process to do the adequate substitutions.
2872 *
2873 * [67] Reference ::= EntityRef | CharRef
2874 *
2875 * [69] PEReference ::= '%' Name ';'
2876 *
2877 * Returns A newly allocated string with the substitution done. The caller
2878 * must deallocate it !
2879 */
2880xmlChar *
2881xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2882 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002883 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002884 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2885 end, end2, end3));
2886}
Owen Taylor3473f882001-02-23 17:55:21 +00002887
2888/************************************************************************
2889 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002890 * Commodity functions, cleanup needed ? *
2891 * *
2892 ************************************************************************/
2893
2894/**
2895 * areBlanks:
2896 * @ctxt: an XML parser context
2897 * @str: a xmlChar *
2898 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002899 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002900 *
2901 * Is this a sequence of blank chars that one can ignore ?
2902 *
2903 * Returns 1 if ignorable 0 otherwise.
2904 */
2905
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002906static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2907 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002908 int i, ret;
2909 xmlNodePtr lastChild;
2910
Daniel Veillard05c13a22001-09-09 08:38:09 +00002911 /*
2912 * Don't spend time trying to differentiate them, the same callback is
2913 * used !
2914 */
2915 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002916 return(0);
2917
Owen Taylor3473f882001-02-23 17:55:21 +00002918 /*
2919 * Check for xml:space value.
2920 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002921 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2922 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002923 return(0);
2924
2925 /*
2926 * Check that the string is made of blanks
2927 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002928 if (blank_chars == 0) {
2929 for (i = 0;i < len;i++)
2930 if (!(IS_BLANK_CH(str[i]))) return(0);
2931 }
Owen Taylor3473f882001-02-23 17:55:21 +00002932
2933 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002934 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002935 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002936 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002937 if (ctxt->myDoc != NULL) {
2938 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2939 if (ret == 0) return(1);
2940 if (ret == 1) return(0);
2941 }
2942
2943 /*
2944 * Otherwise, heuristic :-\
2945 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002946 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002947 if ((ctxt->node->children == NULL) &&
2948 (RAW == '<') && (NXT(1) == '/')) return(0);
2949
2950 lastChild = xmlGetLastChild(ctxt->node);
2951 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002952 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2953 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 } else if (xmlNodeIsText(lastChild))
2955 return(0);
2956 else if ((ctxt->node->children != NULL) &&
2957 (xmlNodeIsText(ctxt->node->children)))
2958 return(0);
2959 return(1);
2960}
2961
Owen Taylor3473f882001-02-23 17:55:21 +00002962/************************************************************************
2963 * *
2964 * Extra stuff for namespace support *
2965 * Relates to http://www.w3.org/TR/WD-xml-names *
2966 * *
2967 ************************************************************************/
2968
2969/**
2970 * xmlSplitQName:
2971 * @ctxt: an XML parser context
2972 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002973 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002974 *
2975 * parse an UTF8 encoded XML qualified name string
2976 *
2977 * [NS 5] QName ::= (Prefix ':')? LocalPart
2978 *
2979 * [NS 6] Prefix ::= NCName
2980 *
2981 * [NS 7] LocalPart ::= NCName
2982 *
2983 * Returns the local part, and prefix is updated
2984 * to get the Prefix if any.
2985 */
2986
2987xmlChar *
2988xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2989 xmlChar buf[XML_MAX_NAMELEN + 5];
2990 xmlChar *buffer = NULL;
2991 int len = 0;
2992 int max = XML_MAX_NAMELEN;
2993 xmlChar *ret = NULL;
2994 const xmlChar *cur = name;
2995 int c;
2996
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002997 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002998 *prefix = NULL;
2999
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00003000 if (cur == NULL) return(NULL);
3001
Owen Taylor3473f882001-02-23 17:55:21 +00003002#ifndef XML_XML_NAMESPACE
3003 /* xml: prefix is not really a namespace */
3004 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3005 (cur[2] == 'l') && (cur[3] == ':'))
3006 return(xmlStrdup(name));
3007#endif
3008
Daniel Veillard597bc482003-07-24 16:08:28 +00003009 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00003010 if (cur[0] == ':')
3011 return(xmlStrdup(name));
3012
3013 c = *cur++;
3014 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3015 buf[len++] = c;
3016 c = *cur++;
3017 }
3018 if (len >= max) {
3019 /*
3020 * Okay someone managed to make a huge name, so he's ready to pay
3021 * for the processing speed.
3022 */
3023 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003024
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003025 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003026 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003027 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003028 return(NULL);
3029 }
3030 memcpy(buffer, buf, len);
3031 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3032 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003033 xmlChar *tmp;
3034
Owen Taylor3473f882001-02-23 17:55:21 +00003035 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003036 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003037 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003038 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003039 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003040 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003041 return(NULL);
3042 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003043 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003044 }
3045 buffer[len++] = c;
3046 c = *cur++;
3047 }
3048 buffer[len] = 0;
3049 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003050
Daniel Veillard597bc482003-07-24 16:08:28 +00003051 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003052 if (buffer != NULL)
3053 xmlFree(buffer);
3054 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003055 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003056 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003057
Owen Taylor3473f882001-02-23 17:55:21 +00003058 if (buffer == NULL)
3059 ret = xmlStrndup(buf, len);
3060 else {
3061 ret = buffer;
3062 buffer = NULL;
3063 max = XML_MAX_NAMELEN;
3064 }
3065
3066
3067 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003068 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003069 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003070 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003071 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003072 }
Owen Taylor3473f882001-02-23 17:55:21 +00003073 len = 0;
3074
Daniel Veillardbb284f42002-10-16 18:02:47 +00003075 /*
3076 * Check that the first character is proper to start
3077 * a new name
3078 */
3079 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3080 ((c >= 0x41) && (c <= 0x5A)) ||
3081 (c == '_') || (c == ':'))) {
3082 int l;
3083 int first = CUR_SCHAR(cur, l);
3084
3085 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003086 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003087 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003088 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003089 }
3090 }
3091 cur++;
3092
Owen Taylor3473f882001-02-23 17:55:21 +00003093 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3094 buf[len++] = c;
3095 c = *cur++;
3096 }
3097 if (len >= max) {
3098 /*
3099 * Okay someone managed to make a huge name, so he's ready to pay
3100 * for the processing speed.
3101 */
3102 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003103
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003104 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003105 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003106 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003107 return(NULL);
3108 }
3109 memcpy(buffer, buf, len);
3110 while (c != 0) { /* tested bigname2.xml */
3111 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003112 xmlChar *tmp;
3113
Owen Taylor3473f882001-02-23 17:55:21 +00003114 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003115 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003116 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003117 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003118 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003119 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003120 return(NULL);
3121 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003122 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003123 }
3124 buffer[len++] = c;
3125 c = *cur++;
3126 }
3127 buffer[len] = 0;
3128 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003129
Owen Taylor3473f882001-02-23 17:55:21 +00003130 if (buffer == NULL)
3131 ret = xmlStrndup(buf, len);
3132 else {
3133 ret = buffer;
3134 }
3135 }
3136
3137 return(ret);
3138}
3139
3140/************************************************************************
3141 * *
3142 * The parser itself *
3143 * Relates to http://www.w3.org/TR/REC-xml *
3144 * *
3145 ************************************************************************/
3146
Daniel Veillard34e3f642008-07-29 09:02:27 +00003147/************************************************************************
3148 * *
3149 * Routines to parse Name, NCName and NmToken *
3150 * *
3151 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003152#ifdef DEBUG
3153static unsigned long nbParseName = 0;
3154static unsigned long nbParseNmToken = 0;
3155static unsigned long nbParseNCName = 0;
3156static unsigned long nbParseNCNameComplex = 0;
3157static unsigned long nbParseNameComplex = 0;
3158static unsigned long nbParseStringName = 0;
3159#endif
3160
Daniel Veillard34e3f642008-07-29 09:02:27 +00003161/*
3162 * The two following functions are related to the change of accepted
3163 * characters for Name and NmToken in the Revision 5 of XML-1.0
3164 * They correspond to the modified production [4] and the new production [4a]
3165 * changes in that revision. Also note that the macros used for the
3166 * productions Letter, Digit, CombiningChar and Extender are not needed
3167 * anymore.
3168 * We still keep compatibility to pre-revision5 parsing semantic if the
3169 * new XML_PARSE_OLD10 option is given to the parser.
3170 */
3171static int
3172xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3173 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3174 /*
3175 * Use the new checks of production [4] [4a] amd [5] of the
3176 * Update 5 of XML-1.0
3177 */
3178 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3179 (((c >= 'a') && (c <= 'z')) ||
3180 ((c >= 'A') && (c <= 'Z')) ||
3181 (c == '_') || (c == ':') ||
3182 ((c >= 0xC0) && (c <= 0xD6)) ||
3183 ((c >= 0xD8) && (c <= 0xF6)) ||
3184 ((c >= 0xF8) && (c <= 0x2FF)) ||
3185 ((c >= 0x370) && (c <= 0x37D)) ||
3186 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3187 ((c >= 0x200C) && (c <= 0x200D)) ||
3188 ((c >= 0x2070) && (c <= 0x218F)) ||
3189 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3190 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3191 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3192 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3193 ((c >= 0x10000) && (c <= 0xEFFFF))))
3194 return(1);
3195 } else {
3196 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3197 return(1);
3198 }
3199 return(0);
3200}
3201
3202static int
3203xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3204 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3205 /*
3206 * Use the new checks of production [4] [4a] amd [5] of the
3207 * Update 5 of XML-1.0
3208 */
3209 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3210 (((c >= 'a') && (c <= 'z')) ||
3211 ((c >= 'A') && (c <= 'Z')) ||
3212 ((c >= '0') && (c <= '9')) || /* !start */
3213 (c == '_') || (c == ':') ||
3214 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3215 ((c >= 0xC0) && (c <= 0xD6)) ||
3216 ((c >= 0xD8) && (c <= 0xF6)) ||
3217 ((c >= 0xF8) && (c <= 0x2FF)) ||
3218 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3219 ((c >= 0x370) && (c <= 0x37D)) ||
3220 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3221 ((c >= 0x200C) && (c <= 0x200D)) ||
3222 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3223 ((c >= 0x2070) && (c <= 0x218F)) ||
3224 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3225 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3226 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3227 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3228 ((c >= 0x10000) && (c <= 0xEFFFF))))
3229 return(1);
3230 } else {
3231 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3232 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003233 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003234 (IS_COMBINING(c)) ||
3235 (IS_EXTENDER(c)))
3236 return(1);
3237 }
3238 return(0);
3239}
3240
Daniel Veillarde57ec792003-09-10 10:50:59 +00003241static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003242 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003243
Daniel Veillard34e3f642008-07-29 09:02:27 +00003244static const xmlChar *
3245xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3246 int len = 0, l;
3247 int c;
3248 int count = 0;
3249
Daniel Veillardc6561462009-03-25 10:22:31 +00003250#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003251 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003252#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003253
3254 /*
3255 * Handler for more complex cases
3256 */
3257 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003258 if (ctxt->instate == XML_PARSER_EOF)
3259 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003260 c = CUR_CHAR(l);
3261 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3262 /*
3263 * Use the new checks of production [4] [4a] amd [5] of the
3264 * Update 5 of XML-1.0
3265 */
3266 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3267 (!(((c >= 'a') && (c <= 'z')) ||
3268 ((c >= 'A') && (c <= 'Z')) ||
3269 (c == '_') || (c == ':') ||
3270 ((c >= 0xC0) && (c <= 0xD6)) ||
3271 ((c >= 0xD8) && (c <= 0xF6)) ||
3272 ((c >= 0xF8) && (c <= 0x2FF)) ||
3273 ((c >= 0x370) && (c <= 0x37D)) ||
3274 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3275 ((c >= 0x200C) && (c <= 0x200D)) ||
3276 ((c >= 0x2070) && (c <= 0x218F)) ||
3277 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3278 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3279 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3280 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3281 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3282 return(NULL);
3283 }
3284 len += l;
3285 NEXTL(l);
3286 c = CUR_CHAR(l);
3287 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3288 (((c >= 'a') && (c <= 'z')) ||
3289 ((c >= 'A') && (c <= 'Z')) ||
3290 ((c >= '0') && (c <= '9')) || /* !start */
3291 (c == '_') || (c == ':') ||
3292 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3293 ((c >= 0xC0) && (c <= 0xD6)) ||
3294 ((c >= 0xD8) && (c <= 0xF6)) ||
3295 ((c >= 0xF8) && (c <= 0x2FF)) ||
3296 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3297 ((c >= 0x370) && (c <= 0x37D)) ||
3298 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3299 ((c >= 0x200C) && (c <= 0x200D)) ||
3300 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3301 ((c >= 0x2070) && (c <= 0x218F)) ||
3302 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3303 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3304 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3305 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3306 ((c >= 0x10000) && (c <= 0xEFFFF))
3307 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003308 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003309 count = 0;
3310 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003311 if (ctxt->instate == XML_PARSER_EOF)
3312 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003313 }
3314 len += l;
3315 NEXTL(l);
3316 c = CUR_CHAR(l);
3317 }
3318 } else {
3319 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3320 (!IS_LETTER(c) && (c != '_') &&
3321 (c != ':'))) {
3322 return(NULL);
3323 }
3324 len += l;
3325 NEXTL(l);
3326 c = CUR_CHAR(l);
3327
3328 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3329 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3330 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003331 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003332 (IS_COMBINING(c)) ||
3333 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003334 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003335 count = 0;
3336 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003337 if (ctxt->instate == XML_PARSER_EOF)
3338 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003339 }
3340 len += l;
3341 NEXTL(l);
3342 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003343 if (c == 0) {
3344 count = 0;
3345 GROW;
3346 if (ctxt->instate == XML_PARSER_EOF)
3347 return(NULL);
3348 c = CUR_CHAR(l);
3349 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003350 }
3351 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003352 if ((len > XML_MAX_NAME_LENGTH) &&
3353 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3354 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3355 return(NULL);
3356 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003357 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3358 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3359 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3360}
3361
Owen Taylor3473f882001-02-23 17:55:21 +00003362/**
3363 * xmlParseName:
3364 * @ctxt: an XML parser context
3365 *
3366 * parse an XML name.
3367 *
3368 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3369 * CombiningChar | Extender
3370 *
3371 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3372 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003373 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003374 *
3375 * Returns the Name parsed or NULL
3376 */
3377
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003378const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003379xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003380 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003381 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003382 int count = 0;
3383
3384 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003385
Daniel Veillardc6561462009-03-25 10:22:31 +00003386#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003387 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003388#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003389
Daniel Veillard48b2f892001-02-25 16:11:03 +00003390 /*
3391 * Accelerator for simple ASCII names
3392 */
3393 in = ctxt->input->cur;
3394 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3395 ((*in >= 0x41) && (*in <= 0x5A)) ||
3396 (*in == '_') || (*in == ':')) {
3397 in++;
3398 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3399 ((*in >= 0x41) && (*in <= 0x5A)) ||
3400 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003401 (*in == '_') || (*in == '-') ||
3402 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003403 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003404 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003405 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003406 if ((count > XML_MAX_NAME_LENGTH) &&
3407 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3408 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3409 return(NULL);
3410 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003411 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003412 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003413 ctxt->nbChars += count;
3414 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003415 if (ret == NULL)
3416 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003417 return(ret);
3418 }
3419 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003421 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003422}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003423
Daniel Veillard34e3f642008-07-29 09:02:27 +00003424static const xmlChar *
3425xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3426 int len = 0, l;
3427 int c;
3428 int count = 0;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003429 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
Daniel Veillard34e3f642008-07-29 09:02:27 +00003430
Daniel Veillardc6561462009-03-25 10:22:31 +00003431#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003433#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003434
3435 /*
3436 * Handler for more complex cases
3437 */
3438 GROW;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003439 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003440 c = CUR_CHAR(l);
3441 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3442 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3443 return(NULL);
3444 }
3445
3446 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3447 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003448 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003449 if ((len > XML_MAX_NAME_LENGTH) &&
3450 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3451 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3452 return(NULL);
3453 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003454 count = 0;
3455 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003456 if (ctxt->instate == XML_PARSER_EOF)
3457 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003458 }
3459 len += l;
3460 NEXTL(l);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003461 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003462 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003463 if (c == 0) {
3464 count = 0;
3465 GROW;
3466 if (ctxt->instate == XML_PARSER_EOF)
3467 return(NULL);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003468 end = ctxt->input->cur;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003469 c = CUR_CHAR(l);
3470 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003471 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003472 if ((len > XML_MAX_NAME_LENGTH) &&
3473 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3474 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3475 return(NULL);
3476 }
Daniel Veillarddcc19502013-05-22 22:56:45 +02003477 return(xmlDictLookup(ctxt->dict, end - len, len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478}
3479
3480/**
3481 * xmlParseNCName:
3482 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003483 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003484 *
3485 * parse an XML name.
3486 *
3487 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3488 * CombiningChar | Extender
3489 *
3490 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3491 *
3492 * Returns the Name parsed or NULL
3493 */
3494
3495static const xmlChar *
3496xmlParseNCName(xmlParserCtxtPtr ctxt) {
3497 const xmlChar *in;
3498 const xmlChar *ret;
3499 int count = 0;
3500
Daniel Veillardc6561462009-03-25 10:22:31 +00003501#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003502 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003503#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504
3505 /*
3506 * Accelerator for simple ASCII names
3507 */
3508 in = ctxt->input->cur;
3509 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3510 ((*in >= 0x41) && (*in <= 0x5A)) ||
3511 (*in == '_')) {
3512 in++;
3513 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3514 ((*in >= 0x41) && (*in <= 0x5A)) ||
3515 ((*in >= 0x30) && (*in <= 0x39)) ||
3516 (*in == '_') || (*in == '-') ||
3517 (*in == '.'))
3518 in++;
3519 if ((*in > 0) && (*in < 0x80)) {
3520 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003521 if ((count > XML_MAX_NAME_LENGTH) &&
3522 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3523 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3524 return(NULL);
3525 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003526 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3527 ctxt->input->cur = in;
3528 ctxt->nbChars += count;
3529 ctxt->input->col += count;
3530 if (ret == NULL) {
3531 xmlErrMemory(ctxt, NULL);
3532 }
3533 return(ret);
3534 }
3535 }
3536 return(xmlParseNCNameComplex(ctxt));
3537}
3538
Daniel Veillard46de64e2002-05-29 08:21:33 +00003539/**
3540 * xmlParseNameAndCompare:
3541 * @ctxt: an XML parser context
3542 *
3543 * parse an XML name and compares for match
3544 * (specialized for endtag parsing)
3545 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003546 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3547 * and the name for mismatch
3548 */
3549
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003550static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003551xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003552 register const xmlChar *cmp = other;
3553 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003554 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003555
3556 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003557 if (ctxt->instate == XML_PARSER_EOF)
3558 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003559
Daniel Veillard46de64e2002-05-29 08:21:33 +00003560 in = ctxt->input->cur;
3561 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003562 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003563 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003564 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003565 }
William M. Brack76e95df2003-10-18 16:20:14 +00003566 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003567 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003568 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003569 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003570 }
3571 /* failure (or end of input buffer), check with full function */
3572 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003573 /* strings coming from the dictionnary direct compare possible */
3574 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003575 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003576 }
3577 return ret;
3578}
3579
Owen Taylor3473f882001-02-23 17:55:21 +00003580/**
3581 * xmlParseStringName:
3582 * @ctxt: an XML parser context
3583 * @str: a pointer to the string pointer (IN/OUT)
3584 *
3585 * parse an XML name.
3586 *
3587 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3588 * CombiningChar | Extender
3589 *
3590 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3591 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003592 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003593 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003594 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003595 * is updated to the current location in the string.
3596 */
3597
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003598static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003599xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3600 xmlChar buf[XML_MAX_NAMELEN + 5];
3601 const xmlChar *cur = *str;
3602 int len = 0, l;
3603 int c;
3604
Daniel Veillardc6561462009-03-25 10:22:31 +00003605#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003606 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003607#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003608
Owen Taylor3473f882001-02-23 17:55:21 +00003609 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003610 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003611 return(NULL);
3612 }
3613
Daniel Veillard34e3f642008-07-29 09:02:27 +00003614 COPY_BUF(l,buf,len,c);
3615 cur += l;
3616 c = CUR_SCHAR(cur, l);
3617 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003618 COPY_BUF(l,buf,len,c);
3619 cur += l;
3620 c = CUR_SCHAR(cur, l);
3621 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3622 /*
3623 * Okay someone managed to make a huge name, so he's ready to pay
3624 * for the processing speed.
3625 */
3626 xmlChar *buffer;
3627 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003628
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003629 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003631 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003632 return(NULL);
3633 }
3634 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003635 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003636 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003637 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003638
3639 if ((len > XML_MAX_NAME_LENGTH) &&
3640 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3641 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3642 xmlFree(buffer);
3643 return(NULL);
3644 }
Owen Taylor3473f882001-02-23 17:55:21 +00003645 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003646 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003647 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003648 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003649 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003650 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003651 return(NULL);
3652 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003653 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003654 }
3655 COPY_BUF(l,buffer,len,c);
3656 cur += l;
3657 c = CUR_SCHAR(cur, l);
3658 }
3659 buffer[len] = 0;
3660 *str = cur;
3661 return(buffer);
3662 }
3663 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003664 if ((len > XML_MAX_NAME_LENGTH) &&
3665 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3666 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3667 return(NULL);
3668 }
Owen Taylor3473f882001-02-23 17:55:21 +00003669 *str = cur;
3670 return(xmlStrndup(buf, len));
3671}
3672
3673/**
3674 * xmlParseNmtoken:
3675 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003676 *
Owen Taylor3473f882001-02-23 17:55:21 +00003677 * parse an XML Nmtoken.
3678 *
3679 * [7] Nmtoken ::= (NameChar)+
3680 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003681 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003682 *
3683 * Returns the Nmtoken parsed or NULL
3684 */
3685
3686xmlChar *
3687xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688 xmlChar buf[XML_MAX_NAMELEN + 5];
3689 int len = 0, l;
3690 int c;
3691 int count = 0;
3692
Daniel Veillardc6561462009-03-25 10:22:31 +00003693#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003694 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003695#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003696
Owen Taylor3473f882001-02-23 17:55:21 +00003697 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003698 if (ctxt->instate == XML_PARSER_EOF)
3699 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003700 c = CUR_CHAR(l);
3701
Daniel Veillard34e3f642008-07-29 09:02:27 +00003702 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003703 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003704 count = 0;
3705 GROW;
3706 }
3707 COPY_BUF(l,buf,len,c);
3708 NEXTL(l);
3709 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003710 if (c == 0) {
3711 count = 0;
3712 GROW;
3713 if (ctxt->instate == XML_PARSER_EOF)
3714 return(NULL);
3715 c = CUR_CHAR(l);
3716 }
Owen Taylor3473f882001-02-23 17:55:21 +00003717 if (len >= XML_MAX_NAMELEN) {
3718 /*
3719 * Okay someone managed to make a huge token, so he's ready to pay
3720 * for the processing speed.
3721 */
3722 xmlChar *buffer;
3723 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003724
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003725 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003726 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003727 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003728 return(NULL);
3729 }
3730 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003731 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003732 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003733 count = 0;
3734 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003735 if (ctxt->instate == XML_PARSER_EOF) {
3736 xmlFree(buffer);
3737 return(NULL);
3738 }
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003741 xmlChar *tmp;
3742
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003743 if ((max > XML_MAX_NAME_LENGTH) &&
3744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3745 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3746 xmlFree(buffer);
3747 return(NULL);
3748 }
Owen Taylor3473f882001-02-23 17:55:21 +00003749 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003750 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003751 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003752 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003753 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003754 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003755 return(NULL);
3756 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003757 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003758 }
3759 COPY_BUF(l,buffer,len,c);
3760 NEXTL(l);
3761 c = CUR_CHAR(l);
3762 }
3763 buffer[len] = 0;
3764 return(buffer);
3765 }
3766 }
3767 if (len == 0)
3768 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003769 if ((len > XML_MAX_NAME_LENGTH) &&
3770 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3771 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772 return(NULL);
3773 }
Owen Taylor3473f882001-02-23 17:55:21 +00003774 return(xmlStrndup(buf, len));
3775}
3776
3777/**
3778 * xmlParseEntityValue:
3779 * @ctxt: an XML parser context
3780 * @orig: if non-NULL store a copy of the original entity value
3781 *
3782 * parse a value for ENTITY declarations
3783 *
3784 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3785 * "'" ([^%&'] | PEReference | Reference)* "'"
3786 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003787 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003788 */
3789
3790xmlChar *
3791xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3792 xmlChar *buf = NULL;
3793 int len = 0;
3794 int size = XML_PARSER_BUFFER_SIZE;
3795 int c, l;
3796 xmlChar stop;
3797 xmlChar *ret = NULL;
3798 const xmlChar *cur = NULL;
3799 xmlParserInputPtr input;
3800
3801 if (RAW == '"') stop = '"';
3802 else if (RAW == '\'') stop = '\'';
3803 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003804 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003805 return(NULL);
3806 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003807 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003808 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003810 return(NULL);
3811 }
3812
3813 /*
3814 * The content of the entity definition is copied in a buffer.
3815 */
3816
3817 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3818 input = ctxt->input;
3819 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003820 if (ctxt->instate == XML_PARSER_EOF) {
3821 xmlFree(buf);
3822 return(NULL);
3823 }
Owen Taylor3473f882001-02-23 17:55:21 +00003824 NEXT;
3825 c = CUR_CHAR(l);
3826 /*
3827 * NOTE: 4.4.5 Included in Literal
3828 * When a parameter entity reference appears in a literal entity
3829 * value, ... a single or double quote character in the replacement
3830 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003831 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003832 * In practice it means we stop the loop only when back at parsing
3833 * the initial entity and the quote is found
3834 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003835 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3836 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003837 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003838 xmlChar *tmp;
3839
Owen Taylor3473f882001-02-23 17:55:21 +00003840 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003841 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3842 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003843 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003844 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003845 return(NULL);
3846 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003847 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003848 }
3849 COPY_BUF(l,buf,len,c);
3850 NEXTL(l);
3851 /*
3852 * Pop-up of finished entities.
3853 */
3854 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3855 xmlPopInput(ctxt);
3856
3857 GROW;
3858 c = CUR_CHAR(l);
3859 if (c == 0) {
3860 GROW;
3861 c = CUR_CHAR(l);
3862 }
3863 }
3864 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003865 if (ctxt->instate == XML_PARSER_EOF) {
3866 xmlFree(buf);
3867 return(NULL);
3868 }
Owen Taylor3473f882001-02-23 17:55:21 +00003869
3870 /*
3871 * Raise problem w.r.t. '&' and '%' being used in non-entities
3872 * reference constructs. Note Charref will be handled in
3873 * xmlStringDecodeEntities()
3874 */
3875 cur = buf;
3876 while (*cur != 0) { /* non input consuming */
3877 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3878 xmlChar *name;
3879 xmlChar tmp = *cur;
3880
3881 cur++;
3882 name = xmlParseStringName(ctxt, &cur);
3883 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003884 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003885 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003886 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003887 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003888 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3889 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003890 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003891 }
3892 if (name != NULL)
3893 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003894 if (*cur == 0)
3895 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003896 }
3897 cur++;
3898 }
3899
3900 /*
3901 * Then PEReference entities are substituted.
3902 */
3903 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003904 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003905 xmlFree(buf);
3906 } else {
3907 NEXT;
3908 /*
3909 * NOTE: 4.4.7 Bypassed
3910 * When a general entity reference appears in the EntityValue in
3911 * an entity declaration, it is bypassed and left as is.
3912 * so XML_SUBSTITUTE_REF is not set here.
3913 */
3914 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3915 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003916 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003917 *orig = buf;
3918 else
3919 xmlFree(buf);
3920 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003921
Owen Taylor3473f882001-02-23 17:55:21 +00003922 return(ret);
3923}
3924
3925/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003926 * xmlParseAttValueComplex:
3927 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003928 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003929 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003930 *
3931 * parse a value for an attribute, this is the fallback function
3932 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003933 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003934 *
3935 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3936 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003937static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003938xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003939 xmlChar limit = 0;
3940 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003941 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003942 size_t len = 0;
3943 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003944 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003945 xmlChar *current = NULL;
3946 xmlEntityPtr ent;
3947
Owen Taylor3473f882001-02-23 17:55:21 +00003948 if (NXT(0) == '"') {
3949 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3950 limit = '"';
3951 NEXT;
3952 } else if (NXT(0) == '\'') {
3953 limit = '\'';
3954 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3955 NEXT;
3956 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003957 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003958 return(NULL);
3959 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003960
Owen Taylor3473f882001-02-23 17:55:21 +00003961 /*
3962 * allocate a translation buffer.
3963 */
3964 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003965 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003966 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003967
3968 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003969 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003970 */
3971 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003972 while (((NXT(0) != limit) && /* checked */
3973 (IS_CHAR(c)) && (c != '<')) &&
3974 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003975 /*
3976 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3977 * special option is given
3978 */
3979 if ((len > XML_MAX_TEXT_LENGTH) &&
3980 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3981 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003982 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003983 goto mem_error;
3984 }
Owen Taylor3473f882001-02-23 17:55:21 +00003985 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003986 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003987 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003988 if (NXT(1) == '#') {
3989 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003990
Owen Taylor3473f882001-02-23 17:55:21 +00003991 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003992 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003993 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003994 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003995 }
3996 buf[len++] = '&';
3997 } else {
3998 /*
3999 * The reparsing will be done in xmlStringGetNodeList()
4000 * called by the attribute() function in SAX.c
4001 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08004002 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004003 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004004 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004005 buf[len++] = '&';
4006 buf[len++] = '#';
4007 buf[len++] = '3';
4008 buf[len++] = '8';
4009 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00004010 }
Daniel Veillarddc171602008-03-26 17:41:38 +00004011 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004012 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004013 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004014 }
Owen Taylor3473f882001-02-23 17:55:21 +00004015 len += xmlCopyChar(0, &buf[len], val);
4016 }
4017 } else {
4018 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004019 ctxt->nbentities++;
4020 if (ent != NULL)
4021 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004022 if ((ent != NULL) &&
4023 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004024 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004025 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004026 }
4027 if ((ctxt->replaceEntities == 0) &&
4028 (ent->content[0] == '&')) {
4029 buf[len++] = '&';
4030 buf[len++] = '#';
4031 buf[len++] = '3';
4032 buf[len++] = '8';
4033 buf[len++] = ';';
4034 } else {
4035 buf[len++] = ent->content[0];
4036 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004037 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004038 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004039 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4040 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004041 XML_SUBSTITUTE_REF,
4042 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004043 if (rep != NULL) {
4044 current = rep;
4045 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004046 if ((*current == 0xD) || (*current == 0xA) ||
4047 (*current == 0x9)) {
4048 buf[len++] = 0x20;
4049 current++;
4050 } else
4051 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004052 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004053 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004054 }
4055 }
4056 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004057 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004058 }
4059 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004060 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004061 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004062 }
Owen Taylor3473f882001-02-23 17:55:21 +00004063 if (ent->content != NULL)
4064 buf[len++] = ent->content[0];
4065 }
4066 } else if (ent != NULL) {
4067 int i = xmlStrlen(ent->name);
4068 const xmlChar *cur = ent->name;
4069
4070 /*
4071 * This may look absurd but is needed to detect
4072 * entities problems
4073 */
4074 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004075 (ent->content != NULL) && (ent->checked == 0)) {
4076 unsigned long oldnbent = ctxt->nbentities;
4077
Owen Taylor3473f882001-02-23 17:55:21 +00004078 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004079 XML_SUBSTITUTE_REF, 0, 0, 0);
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004080
Daniel Veillardcff25462013-03-11 15:57:55 +08004081 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004082 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004083 if (xmlStrchr(rep, '<'))
4084 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004085 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004086 rep = NULL;
4087 }
Owen Taylor3473f882001-02-23 17:55:21 +00004088 }
4089
4090 /*
4091 * Just output the reference
4092 */
4093 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004094 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004095 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004096 }
4097 for (;i > 0;i--)
4098 buf[len++] = *cur++;
4099 buf[len++] = ';';
4100 }
4101 }
4102 } else {
4103 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004104 if ((len != 0) || (!normalize)) {
4105 if ((!normalize) || (!in_space)) {
4106 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004107 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004108 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004109 }
4110 }
4111 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004112 }
4113 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004114 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004115 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004116 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004117 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004118 }
4119 }
4120 NEXTL(l);
4121 }
4122 GROW;
4123 c = CUR_CHAR(l);
4124 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004125 if (ctxt->instate == XML_PARSER_EOF)
4126 goto error;
4127
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004128 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004129 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004130 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004131 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004132 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004133 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004134 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004135 if ((c != 0) && (!IS_CHAR(c))) {
4136 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4137 "invalid character in attribute value\n");
4138 } else {
4139 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4140 "AttValue: ' expected\n");
4141 }
Owen Taylor3473f882001-02-23 17:55:21 +00004142 } else
4143 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004144
4145 /*
4146 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004147 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004148 */
4149 if (len >= INT_MAX) {
4150 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004151 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004152 goto mem_error;
4153 }
4154
4155 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004156 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004157
4158mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004159 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004160error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004161 if (buf != NULL)
4162 xmlFree(buf);
4163 if (rep != NULL)
4164 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004165 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004166}
4167
4168/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004169 * xmlParseAttValue:
4170 * @ctxt: an XML parser context
4171 *
4172 * parse a value for an attribute
4173 * Note: the parser won't do substitution of entities here, this
4174 * will be handled later in xmlStringGetNodeList
4175 *
4176 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4177 * "'" ([^<&'] | Reference)* "'"
4178 *
4179 * 3.3.3 Attribute-Value Normalization:
4180 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004181 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004182 * - a character reference is processed by appending the referenced
4183 * character to the attribute value
4184 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004185 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004186 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4187 * appending #x20 to the normalized value, except that only a single
4188 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004189 * parsed entity or the literal entity value of an internal parsed entity
4190 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004191 * If the declared value is not CDATA, then the XML processor must further
4192 * process the normalized attribute value by discarding any leading and
4193 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004194 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004195 * All attributes for which no declaration has been read should be treated
4196 * by a non-validating parser as if declared CDATA.
4197 *
4198 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4199 */
4200
4201
4202xmlChar *
4203xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004204 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004205 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004206}
4207
4208/**
Owen Taylor3473f882001-02-23 17:55:21 +00004209 * xmlParseSystemLiteral:
4210 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004211 *
Owen Taylor3473f882001-02-23 17:55:21 +00004212 * parse an XML Literal
4213 *
4214 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4215 *
4216 * Returns the SystemLiteral parsed or NULL
4217 */
4218
4219xmlChar *
4220xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4221 xmlChar *buf = NULL;
4222 int len = 0;
4223 int size = XML_PARSER_BUFFER_SIZE;
4224 int cur, l;
4225 xmlChar stop;
4226 int state = ctxt->instate;
4227 int count = 0;
4228
4229 SHRINK;
4230 if (RAW == '"') {
4231 NEXT;
4232 stop = '"';
4233 } else if (RAW == '\'') {
4234 NEXT;
4235 stop = '\'';
4236 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004237 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004238 return(NULL);
4239 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004240
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004241 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004242 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004243 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004244 return(NULL);
4245 }
4246 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4247 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004248 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004249 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004250 xmlChar *tmp;
4251
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004252 if ((size > XML_MAX_NAME_LENGTH) &&
4253 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4254 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4255 xmlFree(buf);
4256 ctxt->instate = (xmlParserInputState) state;
4257 return(NULL);
4258 }
Owen Taylor3473f882001-02-23 17:55:21 +00004259 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004260 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4261 if (tmp == NULL) {
4262 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004264 ctxt->instate = (xmlParserInputState) state;
4265 return(NULL);
4266 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004267 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 count++;
4270 if (count > 50) {
4271 GROW;
4272 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004273 if (ctxt->instate == XML_PARSER_EOF) {
4274 xmlFree(buf);
4275 return(NULL);
4276 }
Owen Taylor3473f882001-02-23 17:55:21 +00004277 }
4278 COPY_BUF(l,buf,len,cur);
4279 NEXTL(l);
4280 cur = CUR_CHAR(l);
4281 if (cur == 0) {
4282 GROW;
4283 SHRINK;
4284 cur = CUR_CHAR(l);
4285 }
4286 }
4287 buf[len] = 0;
4288 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004289 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004291 } else {
4292 NEXT;
4293 }
4294 return(buf);
4295}
4296
4297/**
4298 * xmlParsePubidLiteral:
4299 * @ctxt: an XML parser context
4300 *
4301 * parse an XML public literal
4302 *
4303 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4304 *
4305 * Returns the PubidLiteral parsed or NULL.
4306 */
4307
4308xmlChar *
4309xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4310 xmlChar *buf = NULL;
4311 int len = 0;
4312 int size = XML_PARSER_BUFFER_SIZE;
4313 xmlChar cur;
4314 xmlChar stop;
4315 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004316 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004317
4318 SHRINK;
4319 if (RAW == '"') {
4320 NEXT;
4321 stop = '"';
4322 } else if (RAW == '\'') {
4323 NEXT;
4324 stop = '\'';
4325 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004326 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004327 return(NULL);
4328 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004329 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004330 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004331 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 return(NULL);
4333 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004334 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004335 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004336 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004337 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004338 xmlChar *tmp;
4339
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004340 if ((size > XML_MAX_NAME_LENGTH) &&
4341 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4342 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4343 xmlFree(buf);
4344 return(NULL);
4345 }
Owen Taylor3473f882001-02-23 17:55:21 +00004346 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004347 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4348 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004349 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004350 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004351 return(NULL);
4352 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004353 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004354 }
4355 buf[len++] = cur;
4356 count++;
4357 if (count > 50) {
4358 GROW;
4359 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004360 if (ctxt->instate == XML_PARSER_EOF) {
4361 xmlFree(buf);
4362 return(NULL);
4363 }
Owen Taylor3473f882001-02-23 17:55:21 +00004364 }
4365 NEXT;
4366 cur = CUR;
4367 if (cur == 0) {
4368 GROW;
4369 SHRINK;
4370 cur = CUR;
4371 }
4372 }
4373 buf[len] = 0;
4374 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004375 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004376 } else {
4377 NEXT;
4378 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004379 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004380 return(buf);
4381}
4382
Daniel Veillard8ed10722009-08-20 19:17:36 +02004383static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004384
4385/*
4386 * used for the test in the inner loop of the char data testing
4387 */
4388static const unsigned char test_char_data[256] = {
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4394 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4395 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4396 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4397 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4398 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4399 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4400 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4401 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4402 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4403 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4404 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4420 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4421};
4422
Owen Taylor3473f882001-02-23 17:55:21 +00004423/**
4424 * xmlParseCharData:
4425 * @ctxt: an XML parser context
4426 * @cdata: int indicating whether we are within a CDATA section
4427 *
4428 * parse a CharData section.
4429 * if we are within a CDATA section ']]>' marks an end of section.
4430 *
4431 * The right angle bracket (>) may be represented using the string "&gt;",
4432 * and must, for compatibility, be escaped using "&gt;" or a character
4433 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004434 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004435 *
4436 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4437 */
4438
4439void
4440xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004441 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004442 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004443 int line = ctxt->input->line;
4444 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004445 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004446
4447 SHRINK;
4448 GROW;
4449 /*
4450 * Accelerated common case where input don't need to be
4451 * modified before passing it to the handler.
4452 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004453 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004454 in = ctxt->input->cur;
4455 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004456get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004457 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004458 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004459 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004460 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004461 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004462 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004463 goto get_more_space;
4464 }
4465 if (*in == '<') {
4466 nbchar = in - ctxt->input->cur;
4467 if (nbchar > 0) {
4468 const xmlChar *tmp = ctxt->input->cur;
4469 ctxt->input->cur = in;
4470
Daniel Veillard34099b42004-11-04 17:34:35 +00004471 if ((ctxt->sax != NULL) &&
4472 (ctxt->sax->ignorableWhitespace !=
4473 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004474 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004475 if (ctxt->sax->ignorableWhitespace != NULL)
4476 ctxt->sax->ignorableWhitespace(ctxt->userData,
4477 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004478 } else {
4479 if (ctxt->sax->characters != NULL)
4480 ctxt->sax->characters(ctxt->userData,
4481 tmp, nbchar);
4482 if (*ctxt->space == -1)
4483 *ctxt->space = -2;
4484 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004485 } else if ((ctxt->sax != NULL) &&
4486 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004487 ctxt->sax->characters(ctxt->userData,
4488 tmp, nbchar);
4489 }
4490 }
4491 return;
4492 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004493
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004494get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004495 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004496 while (test_char_data[*in]) {
4497 in++;
4498 ccol++;
4499 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004500 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004501 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004502 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004503 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004504 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004505 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004506 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004507 }
4508 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004509 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004510 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004511 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004512 return;
4513 }
4514 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004515 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004516 goto get_more;
4517 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004518 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004519 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004520 if ((ctxt->sax != NULL) &&
4521 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004522 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004523 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004524 const xmlChar *tmp = ctxt->input->cur;
4525 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004526
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004527 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004528 if (ctxt->sax->ignorableWhitespace != NULL)
4529 ctxt->sax->ignorableWhitespace(ctxt->userData,
4530 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004531 } else {
4532 if (ctxt->sax->characters != NULL)
4533 ctxt->sax->characters(ctxt->userData,
4534 tmp, nbchar);
4535 if (*ctxt->space == -1)
4536 *ctxt->space = -2;
4537 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004538 line = ctxt->input->line;
4539 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004540 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004541 if (ctxt->sax->characters != NULL)
4542 ctxt->sax->characters(ctxt->userData,
4543 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004544 line = ctxt->input->line;
4545 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004546 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004547 /* something really bad happened in the SAX callback */
4548 if (ctxt->instate != XML_PARSER_CONTENT)
4549 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004550 }
4551 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004552 if (*in == 0xD) {
4553 in++;
4554 if (*in == 0xA) {
4555 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004556 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004557 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004558 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004559 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004560 in--;
4561 }
4562 if (*in == '<') {
4563 return;
4564 }
4565 if (*in == '&') {
4566 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004567 }
4568 SHRINK;
4569 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004570 if (ctxt->instate == XML_PARSER_EOF)
4571 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004572 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004573 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004574 nbchar = 0;
4575 }
Daniel Veillard50582112001-03-26 22:52:16 +00004576 ctxt->input->line = line;
4577 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004578 xmlParseCharDataComplex(ctxt, cdata);
4579}
4580
Daniel Veillard01c13b52002-12-10 15:19:08 +00004581/**
4582 * xmlParseCharDataComplex:
4583 * @ctxt: an XML parser context
4584 * @cdata: int indicating whether we are within a CDATA section
4585 *
4586 * parse a CharData section.this is the fallback function
4587 * of xmlParseCharData() when the parsing requires handling
4588 * of non-ASCII characters.
4589 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004590static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004591xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004592 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4593 int nbchar = 0;
4594 int cur, l;
4595 int count = 0;
4596
4597 SHRINK;
4598 GROW;
4599 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004600 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004601 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004602 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004603 if ((cur == ']') && (NXT(1) == ']') &&
4604 (NXT(2) == '>')) {
4605 if (cdata) break;
4606 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004607 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004608 }
4609 }
4610 COPY_BUF(l,buf,nbchar,cur);
4611 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004612 buf[nbchar] = 0;
4613
Owen Taylor3473f882001-02-23 17:55:21 +00004614 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004615 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004616 */
4617 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004618 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004619 if (ctxt->sax->ignorableWhitespace != NULL)
4620 ctxt->sax->ignorableWhitespace(ctxt->userData,
4621 buf, nbchar);
4622 } else {
4623 if (ctxt->sax->characters != NULL)
4624 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004625 if ((ctxt->sax->characters !=
4626 ctxt->sax->ignorableWhitespace) &&
4627 (*ctxt->space == -1))
4628 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004629 }
4630 }
4631 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004632 /* something really bad happened in the SAX callback */
4633 if (ctxt->instate != XML_PARSER_CONTENT)
4634 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004635 }
4636 count++;
4637 if (count > 50) {
4638 GROW;
4639 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004640 if (ctxt->instate == XML_PARSER_EOF)
4641 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004642 }
4643 NEXTL(l);
4644 cur = CUR_CHAR(l);
4645 }
4646 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004647 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004648 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004649 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004650 */
4651 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004652 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004653 if (ctxt->sax->ignorableWhitespace != NULL)
4654 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4655 } else {
4656 if (ctxt->sax->characters != NULL)
4657 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004658 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4659 (*ctxt->space == -1))
4660 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004661 }
4662 }
4663 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004664 if ((cur != 0) && (!IS_CHAR(cur))) {
4665 /* Generate the error and skip the offending character */
4666 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4667 "PCDATA invalid Char value %d\n",
4668 cur);
4669 NEXTL(l);
4670 }
Owen Taylor3473f882001-02-23 17:55:21 +00004671}
4672
4673/**
4674 * xmlParseExternalID:
4675 * @ctxt: an XML parser context
4676 * @publicID: a xmlChar** receiving PubidLiteral
4677 * @strict: indicate whether we should restrict parsing to only
4678 * production [75], see NOTE below
4679 *
4680 * Parse an External ID or a Public ID
4681 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004682 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004683 * 'PUBLIC' S PubidLiteral S SystemLiteral
4684 *
4685 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4686 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4687 *
4688 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4689 *
4690 * Returns the function returns SystemLiteral and in the second
4691 * case publicID receives PubidLiteral, is strict is off
4692 * it is possible to return NULL and have publicID set.
4693 */
4694
4695xmlChar *
4696xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4697 xmlChar *URI = NULL;
4698
4699 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004700
4701 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004702 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004703 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004704 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004705 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4706 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004707 }
4708 SKIP_BLANKS;
4709 URI = xmlParseSystemLiteral(ctxt);
4710 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004711 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004712 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004713 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004714 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004715 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004716 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004717 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004718 }
4719 SKIP_BLANKS;
4720 *publicID = xmlParsePubidLiteral(ctxt);
4721 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004722 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
4724 if (strict) {
4725 /*
4726 * We don't handle [83] so "S SystemLiteral" is required.
4727 */
William M. Brack76e95df2003-10-18 16:20:14 +00004728 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004730 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004731 }
4732 } else {
4733 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004734 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004735 * "S SystemLiteral" is not detected. From a purely parsing
4736 * point of view that's a nice mess.
4737 */
4738 const xmlChar *ptr;
4739 GROW;
4740
4741 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004742 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004743
William M. Brack76e95df2003-10-18 16:20:14 +00004744 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004745 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4746 }
4747 SKIP_BLANKS;
4748 URI = xmlParseSystemLiteral(ctxt);
4749 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004750 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004751 }
4752 }
4753 return(URI);
4754}
4755
4756/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004757 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004758 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004759 * @buf: the already parsed part of the buffer
4760 * @len: number of bytes filles in the buffer
4761 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004762 *
4763 * Skip an XML (SGML) comment <!-- .... -->
4764 * The spec says that "For compatibility, the string "--" (double-hyphen)
4765 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004766 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004767 *
4768 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4769 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004770static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004771xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4772 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004773 int q, ql;
4774 int r, rl;
4775 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004776 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004777 int inputid;
4778
4779 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004780
Owen Taylor3473f882001-02-23 17:55:21 +00004781 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004782 len = 0;
4783 size = XML_PARSER_BUFFER_SIZE;
4784 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4785 if (buf == NULL) {
4786 xmlErrMemory(ctxt, NULL);
4787 return;
4788 }
Owen Taylor3473f882001-02-23 17:55:21 +00004789 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004790 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004791 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004792 if (q == 0)
4793 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004794 if (!IS_CHAR(q)) {
4795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4796 "xmlParseComment: invalid xmlChar value %d\n",
4797 q);
4798 xmlFree (buf);
4799 return;
4800 }
Owen Taylor3473f882001-02-23 17:55:21 +00004801 NEXTL(ql);
4802 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004803 if (r == 0)
4804 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004805 if (!IS_CHAR(r)) {
4806 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4807 "xmlParseComment: invalid xmlChar value %d\n",
4808 q);
4809 xmlFree (buf);
4810 return;
4811 }
Owen Taylor3473f882001-02-23 17:55:21 +00004812 NEXTL(rl);
4813 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004814 if (cur == 0)
4815 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004816 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004817 ((cur != '>') ||
4818 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004819 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004820 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004821 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004822 if ((len > XML_MAX_TEXT_LENGTH) &&
4823 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4824 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4825 "Comment too big found", NULL);
4826 xmlFree (buf);
4827 return;
4828 }
Owen Taylor3473f882001-02-23 17:55:21 +00004829 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004830 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004831 size_t new_size;
4832
4833 new_size = size * 2;
4834 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004835 if (new_buf == NULL) {
4836 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004837 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004838 return;
4839 }
William M. Bracka3215c72004-07-31 16:24:01 +00004840 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004841 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004842 }
4843 COPY_BUF(ql,buf,len,q);
4844 q = r;
4845 ql = rl;
4846 r = cur;
4847 rl = l;
4848
4849 count++;
4850 if (count > 50) {
4851 GROW;
4852 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004853 if (ctxt->instate == XML_PARSER_EOF) {
4854 xmlFree(buf);
4855 return;
4856 }
Owen Taylor3473f882001-02-23 17:55:21 +00004857 }
4858 NEXTL(l);
4859 cur = CUR_CHAR(l);
4860 if (cur == 0) {
4861 SHRINK;
4862 GROW;
4863 cur = CUR_CHAR(l);
4864 }
4865 }
4866 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004867 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004868 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004869 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004870 } else if (!IS_CHAR(cur)) {
4871 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4872 "xmlParseComment: invalid xmlChar value %d\n",
4873 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004874 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004875 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004876 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4877 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004878 }
4879 NEXT;
4880 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4881 (!ctxt->disableSAX))
4882 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004883 }
Daniel Veillardda629342007-08-01 07:49:06 +00004884 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004885 return;
4886not_terminated:
4887 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4888 "Comment not terminated\n", NULL);
4889 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004890 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004891}
Daniel Veillardda629342007-08-01 07:49:06 +00004892
Daniel Veillard4c778d82005-01-23 17:37:44 +00004893/**
4894 * xmlParseComment:
4895 * @ctxt: an XML parser context
4896 *
4897 * Skip an XML (SGML) comment <!-- .... -->
4898 * The spec says that "For compatibility, the string "--" (double-hyphen)
4899 * must not occur within comments. "
4900 *
4901 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4902 */
4903void
4904xmlParseComment(xmlParserCtxtPtr ctxt) {
4905 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004906 size_t size = XML_PARSER_BUFFER_SIZE;
4907 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004908 xmlParserInputState state;
4909 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004910 size_t nbchar = 0;
4911 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004912 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004913
4914 /*
4915 * Check that there is a comment right here.
4916 */
4917 if ((RAW != '<') || (NXT(1) != '!') ||
4918 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004919 state = ctxt->instate;
4920 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004921 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004922 SKIP(4);
4923 SHRINK;
4924 GROW;
4925
4926 /*
4927 * Accelerated common case where input don't need to be
4928 * modified before passing it to the handler.
4929 */
4930 in = ctxt->input->cur;
4931 do {
4932 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004933 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004934 ctxt->input->line++; ctxt->input->col = 1;
4935 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004936 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004937 }
4938get_more:
4939 ccol = ctxt->input->col;
4940 while (((*in > '-') && (*in <= 0x7F)) ||
4941 ((*in >= 0x20) && (*in < '-')) ||
4942 (*in == 0x09)) {
4943 in++;
4944 ccol++;
4945 }
4946 ctxt->input->col = ccol;
4947 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004948 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004949 ctxt->input->line++; ctxt->input->col = 1;
4950 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004951 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004952 goto get_more;
4953 }
4954 nbchar = in - ctxt->input->cur;
4955 /*
4956 * save current set of data
4957 */
4958 if (nbchar > 0) {
4959 if ((ctxt->sax != NULL) &&
4960 (ctxt->sax->comment != NULL)) {
4961 if (buf == NULL) {
4962 if ((*in == '-') && (in[1] == '-'))
4963 size = nbchar + 1;
4964 else
4965 size = XML_PARSER_BUFFER_SIZE + nbchar;
4966 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4967 if (buf == NULL) {
4968 xmlErrMemory(ctxt, NULL);
4969 ctxt->instate = state;
4970 return;
4971 }
4972 len = 0;
4973 } else if (len + nbchar + 1 >= size) {
4974 xmlChar *new_buf;
4975 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4976 new_buf = (xmlChar *) xmlRealloc(buf,
4977 size * sizeof(xmlChar));
4978 if (new_buf == NULL) {
4979 xmlFree (buf);
4980 xmlErrMemory(ctxt, NULL);
4981 ctxt->instate = state;
4982 return;
4983 }
4984 buf = new_buf;
4985 }
4986 memcpy(&buf[len], ctxt->input->cur, nbchar);
4987 len += nbchar;
4988 buf[len] = 0;
4989 }
4990 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004991 if ((len > XML_MAX_TEXT_LENGTH) &&
4992 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4993 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4994 "Comment too big found", NULL);
4995 xmlFree (buf);
4996 return;
4997 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004998 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004999 if (*in == 0xA) {
5000 in++;
5001 ctxt->input->line++; ctxt->input->col = 1;
5002 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005003 if (*in == 0xD) {
5004 in++;
5005 if (*in == 0xA) {
5006 ctxt->input->cur = in;
5007 in++;
5008 ctxt->input->line++; ctxt->input->col = 1;
5009 continue; /* while */
5010 }
5011 in--;
5012 }
5013 SHRINK;
5014 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005015 if (ctxt->instate == XML_PARSER_EOF) {
5016 xmlFree(buf);
5017 return;
5018 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005019 in = ctxt->input->cur;
5020 if (*in == '-') {
5021 if (in[1] == '-') {
5022 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005023 if (ctxt->input->id != inputid) {
5024 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5025 "comment doesn't start and stop in the same entity\n");
5026 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005027 SKIP(3);
5028 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5029 (!ctxt->disableSAX)) {
5030 if (buf != NULL)
5031 ctxt->sax->comment(ctxt->userData, buf);
5032 else
5033 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5034 }
5035 if (buf != NULL)
5036 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005037 if (ctxt->instate != XML_PARSER_EOF)
5038 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005039 return;
5040 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005041 if (buf != NULL) {
5042 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5043 "Double hyphen within comment: "
5044 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005045 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005046 } else
5047 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5048 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005049 in++;
5050 ctxt->input->col++;
5051 }
5052 in++;
5053 ctxt->input->col++;
5054 goto get_more;
5055 }
5056 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5057 xmlParseCommentComplex(ctxt, buf, len, size);
5058 ctxt->instate = state;
5059 return;
5060}
5061
Owen Taylor3473f882001-02-23 17:55:21 +00005062
5063/**
5064 * xmlParsePITarget:
5065 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005066 *
Owen Taylor3473f882001-02-23 17:55:21 +00005067 * parse the name of a PI
5068 *
5069 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5070 *
5071 * Returns the PITarget name or NULL
5072 */
5073
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005074const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005075xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005076 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005077
5078 name = xmlParseName(ctxt);
5079 if ((name != NULL) &&
5080 ((name[0] == 'x') || (name[0] == 'X')) &&
5081 ((name[1] == 'm') || (name[1] == 'M')) &&
5082 ((name[2] == 'l') || (name[2] == 'L'))) {
5083 int i;
5084 if ((name[0] == 'x') && (name[1] == 'm') &&
5085 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005086 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005087 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005088 return(name);
5089 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005090 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005091 return(name);
5092 }
5093 for (i = 0;;i++) {
5094 if (xmlW3CPIs[i] == NULL) break;
5095 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5096 return(name);
5097 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005098 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5099 "xmlParsePITarget: invalid name prefix 'xml'\n",
5100 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005101 }
Daniel Veillard37334572008-07-31 08:20:02 +00005102 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005103 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005104 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005105 }
Owen Taylor3473f882001-02-23 17:55:21 +00005106 return(name);
5107}
5108
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005109#ifdef LIBXML_CATALOG_ENABLED
5110/**
5111 * xmlParseCatalogPI:
5112 * @ctxt: an XML parser context
5113 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005114 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005115 * parse an XML Catalog Processing Instruction.
5116 *
5117 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5118 *
5119 * Occurs only if allowed by the user and if happening in the Misc
5120 * part of the document before any doctype informations
5121 * This will add the given catalog to the parsing context in order
5122 * to be used if there is a resolution need further down in the document
5123 */
5124
5125static void
5126xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5127 xmlChar *URL = NULL;
5128 const xmlChar *tmp, *base;
5129 xmlChar marker;
5130
5131 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005132 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005133 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5134 goto error;
5135 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005136 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005137 if (*tmp != '=') {
5138 return;
5139 }
5140 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005141 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005142 marker = *tmp;
5143 if ((marker != '\'') && (marker != '"'))
5144 goto error;
5145 tmp++;
5146 base = tmp;
5147 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5148 if (*tmp == 0)
5149 goto error;
5150 URL = xmlStrndup(base, tmp - base);
5151 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005152 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005153 if (*tmp != 0)
5154 goto error;
5155
5156 if (URL != NULL) {
5157 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5158 xmlFree(URL);
5159 }
5160 return;
5161
5162error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005163 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5164 "Catalog PI syntax error: %s\n",
5165 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005166 if (URL != NULL)
5167 xmlFree(URL);
5168}
5169#endif
5170
Owen Taylor3473f882001-02-23 17:55:21 +00005171/**
5172 * xmlParsePI:
5173 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005174 *
Owen Taylor3473f882001-02-23 17:55:21 +00005175 * parse an XML Processing Instruction.
5176 *
5177 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5178 *
5179 * The processing is transfered to SAX once parsed.
5180 */
5181
5182void
5183xmlParsePI(xmlParserCtxtPtr ctxt) {
5184 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005185 size_t len = 0;
5186 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005187 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005188 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005189 xmlParserInputState state;
5190 int count = 0;
5191
5192 if ((RAW == '<') && (NXT(1) == '?')) {
5193 xmlParserInputPtr input = ctxt->input;
5194 state = ctxt->instate;
5195 ctxt->instate = XML_PARSER_PI;
5196 /*
5197 * this is a Processing Instruction.
5198 */
5199 SKIP(2);
5200 SHRINK;
5201
5202 /*
5203 * Parse the target name and check for special support like
5204 * namespace.
5205 */
5206 target = xmlParsePITarget(ctxt);
5207 if (target != NULL) {
5208 if ((RAW == '?') && (NXT(1) == '>')) {
5209 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005210 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5211 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005212 }
5213 SKIP(2);
5214
5215 /*
5216 * SAX: PI detected.
5217 */
5218 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5219 (ctxt->sax->processingInstruction != NULL))
5220 ctxt->sax->processingInstruction(ctxt->userData,
5221 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005222 if (ctxt->instate != XML_PARSER_EOF)
5223 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005224 return;
5225 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005226 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005227 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005228 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005229 ctxt->instate = state;
5230 return;
5231 }
5232 cur = CUR;
5233 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005234 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5235 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005236 }
5237 SKIP_BLANKS;
5238 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005239 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005240 ((cur != '?') || (NXT(1) != '>'))) {
5241 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005242 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005243 size_t new_size = size * 2;
5244 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005245 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005246 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005247 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005248 ctxt->instate = state;
5249 return;
5250 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005251 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005252 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005253 }
5254 count++;
5255 if (count > 50) {
5256 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005257 if (ctxt->instate == XML_PARSER_EOF) {
5258 xmlFree(buf);
5259 return;
5260 }
Owen Taylor3473f882001-02-23 17:55:21 +00005261 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005262 if ((len > XML_MAX_TEXT_LENGTH) &&
5263 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5264 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5265 "PI %s too big found", target);
5266 xmlFree(buf);
5267 ctxt->instate = state;
5268 return;
5269 }
Owen Taylor3473f882001-02-23 17:55:21 +00005270 }
5271 COPY_BUF(l,buf,len,cur);
5272 NEXTL(l);
5273 cur = CUR_CHAR(l);
5274 if (cur == 0) {
5275 SHRINK;
5276 GROW;
5277 cur = CUR_CHAR(l);
5278 }
5279 }
Daniel Veillard51304812012-07-19 20:34:26 +08005280 if ((len > XML_MAX_TEXT_LENGTH) &&
5281 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5282 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5283 "PI %s too big found", target);
5284 xmlFree(buf);
5285 ctxt->instate = state;
5286 return;
5287 }
Owen Taylor3473f882001-02-23 17:55:21 +00005288 buf[len] = 0;
5289 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005290 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5291 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005292 } else {
5293 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005294 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5295 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005296 }
5297 SKIP(2);
5298
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005299#ifdef LIBXML_CATALOG_ENABLED
5300 if (((state == XML_PARSER_MISC) ||
5301 (state == XML_PARSER_START)) &&
5302 (xmlStrEqual(target, XML_CATALOG_PI))) {
5303 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5304 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5305 (allow == XML_CATA_ALLOW_ALL))
5306 xmlParseCatalogPI(ctxt, buf);
5307 }
5308#endif
5309
5310
Owen Taylor3473f882001-02-23 17:55:21 +00005311 /*
5312 * SAX: PI detected.
5313 */
5314 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5315 (ctxt->sax->processingInstruction != NULL))
5316 ctxt->sax->processingInstruction(ctxt->userData,
5317 target, buf);
5318 }
5319 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005320 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005321 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005322 }
Chris Evans77404b82011-12-14 16:18:25 +08005323 if (ctxt->instate != XML_PARSER_EOF)
5324 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005325 }
5326}
5327
5328/**
5329 * xmlParseNotationDecl:
5330 * @ctxt: an XML parser context
5331 *
5332 * parse a notation declaration
5333 *
5334 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5335 *
5336 * Hence there is actually 3 choices:
5337 * 'PUBLIC' S PubidLiteral
5338 * 'PUBLIC' S PubidLiteral S SystemLiteral
5339 * and 'SYSTEM' S SystemLiteral
5340 *
5341 * See the NOTE on xmlParseExternalID().
5342 */
5343
5344void
5345xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005346 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005347 xmlChar *Pubid;
5348 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005349
Daniel Veillarda07050d2003-10-19 14:46:32 +00005350 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005351 xmlParserInputPtr input = ctxt->input;
5352 SHRINK;
5353 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005354 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005355 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5356 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005357 return;
5358 }
5359 SKIP_BLANKS;
5360
Daniel Veillard76d66f42001-05-16 21:05:17 +00005361 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005362 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005363 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005364 return;
5365 }
William M. Brack76e95df2003-10-18 16:20:14 +00005366 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005367 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005368 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005369 return;
5370 }
Daniel Veillard37334572008-07-31 08:20:02 +00005371 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005372 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005373 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005374 name, NULL, NULL);
5375 }
Owen Taylor3473f882001-02-23 17:55:21 +00005376 SKIP_BLANKS;
5377
5378 /*
5379 * Parse the IDs.
5380 */
5381 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5382 SKIP_BLANKS;
5383
5384 if (RAW == '>') {
5385 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005386 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5387 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005388 }
5389 NEXT;
5390 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5391 (ctxt->sax->notationDecl != NULL))
5392 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5393 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005394 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 }
Owen Taylor3473f882001-02-23 17:55:21 +00005396 if (Systemid != NULL) xmlFree(Systemid);
5397 if (Pubid != NULL) xmlFree(Pubid);
5398 }
5399}
5400
5401/**
5402 * xmlParseEntityDecl:
5403 * @ctxt: an XML parser context
5404 *
5405 * parse <!ENTITY declarations
5406 *
5407 * [70] EntityDecl ::= GEDecl | PEDecl
5408 *
5409 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5410 *
5411 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5412 *
5413 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5414 *
5415 * [74] PEDef ::= EntityValue | ExternalID
5416 *
5417 * [76] NDataDecl ::= S 'NDATA' S Name
5418 *
5419 * [ VC: Notation Declared ]
5420 * The Name must match the declared name of a notation.
5421 */
5422
5423void
5424xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005425 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005426 xmlChar *value = NULL;
5427 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005428 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005429 int isParameter = 0;
5430 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005431 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005432
Daniel Veillard4c778d82005-01-23 17:37:44 +00005433 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005434 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005435 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005436 SHRINK;
5437 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005438 skipped = SKIP_BLANKS;
5439 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005440 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5441 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005442 }
Owen Taylor3473f882001-02-23 17:55:21 +00005443
5444 if (RAW == '%') {
5445 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005446 skipped = SKIP_BLANKS;
5447 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005450 }
Owen Taylor3473f882001-02-23 17:55:21 +00005451 isParameter = 1;
5452 }
5453
Daniel Veillard76d66f42001-05-16 21:05:17 +00005454 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005455 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005456 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5457 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005458 return;
5459 }
Daniel Veillard37334572008-07-31 08:20:02 +00005460 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005461 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005462 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005463 name, NULL, NULL);
5464 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005465 skipped = SKIP_BLANKS;
5466 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005469 }
Owen Taylor3473f882001-02-23 17:55:21 +00005470
Daniel Veillardf5582f12002-06-11 10:08:16 +00005471 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005472 /*
5473 * handle the various case of definitions...
5474 */
5475 if (isParameter) {
5476 if ((RAW == '"') || (RAW == '\'')) {
5477 value = xmlParseEntityValue(ctxt, &orig);
5478 if (value) {
5479 if ((ctxt->sax != NULL) &&
5480 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5481 ctxt->sax->entityDecl(ctxt->userData, name,
5482 XML_INTERNAL_PARAMETER_ENTITY,
5483 NULL, NULL, value);
5484 }
5485 } else {
5486 URI = xmlParseExternalID(ctxt, &literal, 1);
5487 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005488 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005489 }
5490 if (URI) {
5491 xmlURIPtr uri;
5492
5493 uri = xmlParseURI((const char *) URI);
5494 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005495 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5496 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005497 /*
5498 * This really ought to be a well formedness error
5499 * but the XML Core WG decided otherwise c.f. issue
5500 * E26 of the XML erratas.
5501 */
Owen Taylor3473f882001-02-23 17:55:21 +00005502 } else {
5503 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005504 /*
5505 * Okay this is foolish to block those but not
5506 * invalid URIs.
5507 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005508 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005509 } else {
5510 if ((ctxt->sax != NULL) &&
5511 (!ctxt->disableSAX) &&
5512 (ctxt->sax->entityDecl != NULL))
5513 ctxt->sax->entityDecl(ctxt->userData, name,
5514 XML_EXTERNAL_PARAMETER_ENTITY,
5515 literal, URI, NULL);
5516 }
5517 xmlFreeURI(uri);
5518 }
5519 }
5520 }
5521 } else {
5522 if ((RAW == '"') || (RAW == '\'')) {
5523 value = xmlParseEntityValue(ctxt, &orig);
5524 if ((ctxt->sax != NULL) &&
5525 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5526 ctxt->sax->entityDecl(ctxt->userData, name,
5527 XML_INTERNAL_GENERAL_ENTITY,
5528 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005529 /*
5530 * For expat compatibility in SAX mode.
5531 */
5532 if ((ctxt->myDoc == NULL) ||
5533 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5534 if (ctxt->myDoc == NULL) {
5535 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005536 if (ctxt->myDoc == NULL) {
5537 xmlErrMemory(ctxt, "New Doc failed");
5538 return;
5539 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005540 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005541 }
5542 if (ctxt->myDoc->intSubset == NULL)
5543 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5544 BAD_CAST "fake", NULL, NULL);
5545
Daniel Veillard1af9a412003-08-20 22:54:39 +00005546 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5547 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005548 }
Owen Taylor3473f882001-02-23 17:55:21 +00005549 } else {
5550 URI = xmlParseExternalID(ctxt, &literal, 1);
5551 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005552 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005553 }
5554 if (URI) {
5555 xmlURIPtr uri;
5556
5557 uri = xmlParseURI((const char *)URI);
5558 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005559 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5560 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005561 /*
5562 * This really ought to be a well formedness error
5563 * but the XML Core WG decided otherwise c.f. issue
5564 * E26 of the XML erratas.
5565 */
Owen Taylor3473f882001-02-23 17:55:21 +00005566 } else {
5567 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005568 /*
5569 * Okay this is foolish to block those but not
5570 * invalid URIs.
5571 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005572 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005573 }
5574 xmlFreeURI(uri);
5575 }
5576 }
William M. Brack76e95df2003-10-18 16:20:14 +00005577 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005578 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5579 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005580 }
5581 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005582 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005583 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005584 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5586 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005587 }
5588 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005589 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005590 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5591 (ctxt->sax->unparsedEntityDecl != NULL))
5592 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5593 literal, URI, ndata);
5594 } else {
5595 if ((ctxt->sax != NULL) &&
5596 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5597 ctxt->sax->entityDecl(ctxt->userData, name,
5598 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5599 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005600 /*
5601 * For expat compatibility in SAX mode.
5602 * assuming the entity repalcement was asked for
5603 */
5604 if ((ctxt->replaceEntities != 0) &&
5605 ((ctxt->myDoc == NULL) ||
5606 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5607 if (ctxt->myDoc == NULL) {
5608 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005609 if (ctxt->myDoc == NULL) {
5610 xmlErrMemory(ctxt, "New Doc failed");
5611 return;
5612 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005613 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005614 }
5615
5616 if (ctxt->myDoc->intSubset == NULL)
5617 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5618 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005619 xmlSAX2EntityDecl(ctxt, name,
5620 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5621 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005622 }
Owen Taylor3473f882001-02-23 17:55:21 +00005623 }
5624 }
5625 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005626 if (ctxt->instate == XML_PARSER_EOF)
5627 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005628 SKIP_BLANKS;
5629 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005630 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005631 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005632 } else {
5633 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005634 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5635 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005636 }
5637 NEXT;
5638 }
5639 if (orig != NULL) {
5640 /*
5641 * Ugly mechanism to save the raw entity value.
5642 */
5643 xmlEntityPtr cur = NULL;
5644
5645 if (isParameter) {
5646 if ((ctxt->sax != NULL) &&
5647 (ctxt->sax->getParameterEntity != NULL))
5648 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5649 } else {
5650 if ((ctxt->sax != NULL) &&
5651 (ctxt->sax->getEntity != NULL))
5652 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005653 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005654 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005655 }
Owen Taylor3473f882001-02-23 17:55:21 +00005656 }
5657 if (cur != NULL) {
5658 if (cur->orig != NULL)
5659 xmlFree(orig);
5660 else
5661 cur->orig = orig;
5662 } else
5663 xmlFree(orig);
5664 }
Owen Taylor3473f882001-02-23 17:55:21 +00005665 if (value != NULL) xmlFree(value);
5666 if (URI != NULL) xmlFree(URI);
5667 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 }
5669}
5670
5671/**
5672 * xmlParseDefaultDecl:
5673 * @ctxt: an XML parser context
5674 * @value: Receive a possible fixed default value for the attribute
5675 *
5676 * Parse an attribute default declaration
5677 *
5678 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5679 *
5680 * [ VC: Required Attribute ]
5681 * if the default declaration is the keyword #REQUIRED, then the
5682 * attribute must be specified for all elements of the type in the
5683 * attribute-list declaration.
5684 *
5685 * [ VC: Attribute Default Legal ]
5686 * The declared default value must meet the lexical constraints of
5687 * the declared attribute type c.f. xmlValidateAttributeDecl()
5688 *
5689 * [ VC: Fixed Attribute Default ]
5690 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005691 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005692 *
5693 * [ WFC: No < in Attribute Values ]
5694 * handled in xmlParseAttValue()
5695 *
5696 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005697 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005698 */
5699
5700int
5701xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5702 int val;
5703 xmlChar *ret;
5704
5705 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005706 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005707 SKIP(9);
5708 return(XML_ATTRIBUTE_REQUIRED);
5709 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005710 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005711 SKIP(8);
5712 return(XML_ATTRIBUTE_IMPLIED);
5713 }
5714 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005715 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005716 SKIP(6);
5717 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005718 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005719 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5720 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005721 }
5722 SKIP_BLANKS;
5723 }
5724 ret = xmlParseAttValue(ctxt);
5725 ctxt->instate = XML_PARSER_DTD;
5726 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005727 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005728 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005729 } else
5730 *value = ret;
5731 return(val);
5732}
5733
5734/**
5735 * xmlParseNotationType:
5736 * @ctxt: an XML parser context
5737 *
5738 * parse an Notation attribute type.
5739 *
5740 * Note: the leading 'NOTATION' S part has already being parsed...
5741 *
5742 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5743 *
5744 * [ VC: Notation Attributes ]
5745 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005746 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005747 *
5748 * Returns: the notation attribute tree built while parsing
5749 */
5750
5751xmlEnumerationPtr
5752xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005753 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005754 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005755
5756 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005757 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005758 return(NULL);
5759 }
5760 SHRINK;
5761 do {
5762 NEXT;
5763 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005764 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005765 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005766 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5767 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005768 xmlFreeEnumeration(ret);
5769 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005770 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005771 tmp = ret;
5772 while (tmp != NULL) {
5773 if (xmlStrEqual(name, tmp->name)) {
5774 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5775 "standalone: attribute notation value token %s duplicated\n",
5776 name, NULL);
5777 if (!xmlDictOwns(ctxt->dict, name))
5778 xmlFree((xmlChar *) name);
5779 break;
5780 }
5781 tmp = tmp->next;
5782 }
5783 if (tmp == NULL) {
5784 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005785 if (cur == NULL) {
5786 xmlFreeEnumeration(ret);
5787 return(NULL);
5788 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005789 if (last == NULL) ret = last = cur;
5790 else {
5791 last->next = cur;
5792 last = cur;
5793 }
Owen Taylor3473f882001-02-23 17:55:21 +00005794 }
5795 SKIP_BLANKS;
5796 } while (RAW == '|');
5797 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005798 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005799 xmlFreeEnumeration(ret);
5800 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005801 }
5802 NEXT;
5803 return(ret);
5804}
5805
5806/**
5807 * xmlParseEnumerationType:
5808 * @ctxt: an XML parser context
5809 *
5810 * parse an Enumeration attribute type.
5811 *
5812 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5813 *
5814 * [ VC: Enumeration ]
5815 * Values of this type must match one of the Nmtoken tokens in
5816 * the declaration
5817 *
5818 * Returns: the enumeration attribute tree built while parsing
5819 */
5820
5821xmlEnumerationPtr
5822xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5823 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005824 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005825
5826 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005827 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005828 return(NULL);
5829 }
5830 SHRINK;
5831 do {
5832 NEXT;
5833 SKIP_BLANKS;
5834 name = xmlParseNmtoken(ctxt);
5835 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005836 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005837 return(ret);
5838 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005839 tmp = ret;
5840 while (tmp != NULL) {
5841 if (xmlStrEqual(name, tmp->name)) {
5842 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5843 "standalone: attribute enumeration value token %s duplicated\n",
5844 name, NULL);
5845 if (!xmlDictOwns(ctxt->dict, name))
5846 xmlFree(name);
5847 break;
5848 }
5849 tmp = tmp->next;
5850 }
5851 if (tmp == NULL) {
5852 cur = xmlCreateEnumeration(name);
5853 if (!xmlDictOwns(ctxt->dict, name))
5854 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005855 if (cur == NULL) {
5856 xmlFreeEnumeration(ret);
5857 return(NULL);
5858 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005859 if (last == NULL) ret = last = cur;
5860 else {
5861 last->next = cur;
5862 last = cur;
5863 }
Owen Taylor3473f882001-02-23 17:55:21 +00005864 }
5865 SKIP_BLANKS;
5866 } while (RAW == '|');
5867 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005868 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005869 return(ret);
5870 }
5871 NEXT;
5872 return(ret);
5873}
5874
5875/**
5876 * xmlParseEnumeratedType:
5877 * @ctxt: an XML parser context
5878 * @tree: the enumeration tree built while parsing
5879 *
5880 * parse an Enumerated attribute type.
5881 *
5882 * [57] EnumeratedType ::= NotationType | Enumeration
5883 *
5884 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5885 *
5886 *
5887 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5888 */
5889
5890int
5891xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005892 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005893 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005894 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005895 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5896 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005897 return(0);
5898 }
5899 SKIP_BLANKS;
5900 *tree = xmlParseNotationType(ctxt);
5901 if (*tree == NULL) return(0);
5902 return(XML_ATTRIBUTE_NOTATION);
5903 }
5904 *tree = xmlParseEnumerationType(ctxt);
5905 if (*tree == NULL) return(0);
5906 return(XML_ATTRIBUTE_ENUMERATION);
5907}
5908
5909/**
5910 * xmlParseAttributeType:
5911 * @ctxt: an XML parser context
5912 * @tree: the enumeration tree built while parsing
5913 *
5914 * parse the Attribute list def for an element
5915 *
5916 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5917 *
5918 * [55] StringType ::= 'CDATA'
5919 *
5920 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5921 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5922 *
5923 * Validity constraints for attribute values syntax are checked in
5924 * xmlValidateAttributeValue()
5925 *
5926 * [ VC: ID ]
5927 * Values of type ID must match the Name production. A name must not
5928 * appear more than once in an XML document as a value of this type;
5929 * i.e., ID values must uniquely identify the elements which bear them.
5930 *
5931 * [ VC: One ID per Element Type ]
5932 * No element type may have more than one ID attribute specified.
5933 *
5934 * [ VC: ID Attribute Default ]
5935 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5936 *
5937 * [ VC: IDREF ]
5938 * Values of type IDREF must match the Name production, and values
5939 * of type IDREFS must match Names; each IDREF Name must match the value
5940 * of an ID attribute on some element in the XML document; i.e. IDREF
5941 * values must match the value of some ID attribute.
5942 *
5943 * [ VC: Entity Name ]
5944 * Values of type ENTITY must match the Name production, values
5945 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005946 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005947 *
5948 * [ VC: Name Token ]
5949 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005950 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005951 *
5952 * Returns the attribute type
5953 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005954int
Owen Taylor3473f882001-02-23 17:55:21 +00005955xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5956 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005957 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005958 SKIP(5);
5959 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005960 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005961 SKIP(6);
5962 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005963 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005964 SKIP(5);
5965 return(XML_ATTRIBUTE_IDREF);
5966 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5967 SKIP(2);
5968 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005969 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005970 SKIP(6);
5971 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005972 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005973 SKIP(8);
5974 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005975 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005976 SKIP(8);
5977 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005978 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005979 SKIP(7);
5980 return(XML_ATTRIBUTE_NMTOKEN);
5981 }
5982 return(xmlParseEnumeratedType(ctxt, tree));
5983}
5984
5985/**
5986 * xmlParseAttributeListDecl:
5987 * @ctxt: an XML parser context
5988 *
5989 * : parse the Attribute list def for an element
5990 *
5991 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5992 *
5993 * [53] AttDef ::= S Name S AttType S DefaultDecl
5994 *
5995 */
5996void
5997xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005998 const xmlChar *elemName;
5999 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00006000 xmlEnumerationPtr tree;
6001
Daniel Veillarda07050d2003-10-19 14:46:32 +00006002 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006003 xmlParserInputPtr input = ctxt->input;
6004
6005 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006006 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006008 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006009 }
6010 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006011 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006012 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006013 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6014 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006015 return;
6016 }
6017 SKIP_BLANKS;
6018 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006019 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006020 const xmlChar *check = CUR_PTR;
6021 int type;
6022 int def;
6023 xmlChar *defaultValue = NULL;
6024
6025 GROW;
6026 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006027 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006028 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006029 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6030 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006031 break;
6032 }
6033 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006034 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006035 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006036 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006037 break;
6038 }
6039 SKIP_BLANKS;
6040
6041 type = xmlParseAttributeType(ctxt, &tree);
6042 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006043 break;
6044 }
6045
6046 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006047 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006048 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6049 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006050 if (tree != NULL)
6051 xmlFreeEnumeration(tree);
6052 break;
6053 }
6054 SKIP_BLANKS;
6055
6056 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6057 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006058 if (defaultValue != NULL)
6059 xmlFree(defaultValue);
6060 if (tree != NULL)
6061 xmlFreeEnumeration(tree);
6062 break;
6063 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006064 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6065 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006066
6067 GROW;
6068 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006069 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006070 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006071 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006072 if (defaultValue != NULL)
6073 xmlFree(defaultValue);
6074 if (tree != NULL)
6075 xmlFreeEnumeration(tree);
6076 break;
6077 }
6078 SKIP_BLANKS;
6079 }
6080 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6082 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006083 if (defaultValue != NULL)
6084 xmlFree(defaultValue);
6085 if (tree != NULL)
6086 xmlFreeEnumeration(tree);
6087 break;
6088 }
6089 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6090 (ctxt->sax->attributeDecl != NULL))
6091 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6092 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006093 else if (tree != NULL)
6094 xmlFreeEnumeration(tree);
6095
6096 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006097 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006098 (def != XML_ATTRIBUTE_REQUIRED)) {
6099 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6100 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006101 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006102 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6103 }
Owen Taylor3473f882001-02-23 17:55:21 +00006104 if (defaultValue != NULL)
6105 xmlFree(defaultValue);
6106 GROW;
6107 }
6108 if (RAW == '>') {
6109 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006110 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6111 "Attribute list declaration doesn't start and stop in the same entity\n",
6112 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 }
6114 NEXT;
6115 }
Owen Taylor3473f882001-02-23 17:55:21 +00006116 }
6117}
6118
6119/**
6120 * xmlParseElementMixedContentDecl:
6121 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006122 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006123 *
6124 * parse the declaration for a Mixed Element content
6125 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006126 *
Owen Taylor3473f882001-02-23 17:55:21 +00006127 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6128 * '(' S? '#PCDATA' S? ')'
6129 *
6130 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6131 *
6132 * [ VC: No Duplicate Types ]
6133 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006134 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006135 *
6136 * returns: the list of the xmlElementContentPtr describing the element choices
6137 */
6138xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006139xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006140 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006141 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006142
6143 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006144 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006145 SKIP(7);
6146 SKIP_BLANKS;
6147 SHRINK;
6148 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006149 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006150 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6151"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006152 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006153 }
Owen Taylor3473f882001-02-23 17:55:21 +00006154 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006155 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006156 if (ret == NULL)
6157 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006158 if (RAW == '*') {
6159 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6160 NEXT;
6161 }
6162 return(ret);
6163 }
6164 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006165 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006166 if (ret == NULL) return(NULL);
6167 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006168 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006169 NEXT;
6170 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006171 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 if (ret == NULL) return(NULL);
6173 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006174 if (cur != NULL)
6175 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006176 cur = ret;
6177 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006178 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006179 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006180 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006181 if (n->c1 != NULL)
6182 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006183 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006184 if (n != NULL)
6185 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006186 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006187 }
6188 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006189 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006190 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006191 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006192 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006193 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006194 return(NULL);
6195 }
6196 SKIP_BLANKS;
6197 GROW;
6198 }
6199 if ((RAW == ')') && (NXT(1) == '*')) {
6200 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006201 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006202 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006203 if (cur->c2 != NULL)
6204 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006205 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006206 if (ret != NULL)
6207 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006208 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006209 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6210"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006211 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006212 }
Owen Taylor3473f882001-02-23 17:55:21 +00006213 SKIP(2);
6214 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006215 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006216 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006217 return(NULL);
6218 }
6219
6220 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006221 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006222 }
6223 return(ret);
6224}
6225
6226/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006227 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006228 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006229 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006230 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006231 *
6232 * parse the declaration for a Mixed Element content
6233 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006234 *
Owen Taylor3473f882001-02-23 17:55:21 +00006235 *
6236 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6237 *
6238 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6239 *
6240 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6241 *
6242 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6243 *
6244 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6245 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006246 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006247 * opening or closing parentheses in a choice, seq, or Mixed
6248 * construct is contained in the replacement text for a parameter
6249 * entity, both must be contained in the same replacement text. For
6250 * interoperability, if a parameter-entity reference appears in a
6251 * choice, seq, or Mixed construct, its replacement text should not
6252 * be empty, and neither the first nor last non-blank character of
6253 * the replacement text should be a connector (| or ,).
6254 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006255 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006256 * hierarchy.
6257 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006258static xmlElementContentPtr
6259xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6260 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006261 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006262 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006263 xmlChar type = 0;
6264
Daniel Veillard489f9672009-08-10 16:49:30 +02006265 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6266 (depth > 2048)) {
6267 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6268"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6269 depth);
6270 return(NULL);
6271 }
Owen Taylor3473f882001-02-23 17:55:21 +00006272 SKIP_BLANKS;
6273 GROW;
6274 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006275 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006276
Owen Taylor3473f882001-02-23 17:55:21 +00006277 /* Recurse on first child */
6278 NEXT;
6279 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006280 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6281 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006282 SKIP_BLANKS;
6283 GROW;
6284 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006285 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006286 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006287 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006288 return(NULL);
6289 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006290 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006291 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006292 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006293 return(NULL);
6294 }
Owen Taylor3473f882001-02-23 17:55:21 +00006295 GROW;
6296 if (RAW == '?') {
6297 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6298 NEXT;
6299 } else if (RAW == '*') {
6300 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6301 NEXT;
6302 } else if (RAW == '+') {
6303 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6304 NEXT;
6305 } else {
6306 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6307 }
Owen Taylor3473f882001-02-23 17:55:21 +00006308 GROW;
6309 }
6310 SKIP_BLANKS;
6311 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006312 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006313 /*
6314 * Each loop we parse one separator and one element.
6315 */
6316 if (RAW == ',') {
6317 if (type == 0) type = CUR;
6318
6319 /*
6320 * Detect "Name | Name , Name" error
6321 */
6322 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006323 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006324 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006325 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006326 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006327 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006328 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006329 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006330 return(NULL);
6331 }
6332 NEXT;
6333
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006334 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006335 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006336 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006337 xmlFreeDocElementContent(ctxt->myDoc, last);
6338 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006339 return(NULL);
6340 }
6341 if (last == NULL) {
6342 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006343 if (ret != NULL)
6344 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006345 ret = cur = op;
6346 } else {
6347 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006348 if (op != NULL)
6349 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006350 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006351 if (last != NULL)
6352 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006353 cur =op;
6354 last = NULL;
6355 }
6356 } else if (RAW == '|') {
6357 if (type == 0) type = CUR;
6358
6359 /*
6360 * Detect "Name , Name | Name" error
6361 */
6362 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006363 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006364 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006365 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006366 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006367 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006368 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006369 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006370 return(NULL);
6371 }
6372 NEXT;
6373
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006374 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006375 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006376 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006377 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006378 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006379 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006380 return(NULL);
6381 }
6382 if (last == NULL) {
6383 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006384 if (ret != NULL)
6385 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006386 ret = cur = op;
6387 } else {
6388 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006389 if (op != NULL)
6390 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006391 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006392 if (last != NULL)
6393 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006394 cur =op;
6395 last = NULL;
6396 }
6397 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006398 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006399 if ((last != NULL) && (last != ret))
6400 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006401 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006402 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006403 return(NULL);
6404 }
6405 GROW;
6406 SKIP_BLANKS;
6407 GROW;
6408 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006409 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006410 /* Recurse on second child */
6411 NEXT;
6412 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006413 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6414 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006415 SKIP_BLANKS;
6416 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006417 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006418 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006419 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006420 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006421 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006422 return(NULL);
6423 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006424 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006425 if (last == NULL) {
6426 if (ret != NULL)
6427 xmlFreeDocElementContent(ctxt->myDoc, ret);
6428 return(NULL);
6429 }
Owen Taylor3473f882001-02-23 17:55:21 +00006430 if (RAW == '?') {
6431 last->ocur = XML_ELEMENT_CONTENT_OPT;
6432 NEXT;
6433 } else if (RAW == '*') {
6434 last->ocur = XML_ELEMENT_CONTENT_MULT;
6435 NEXT;
6436 } else if (RAW == '+') {
6437 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6438 NEXT;
6439 } else {
6440 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6441 }
6442 }
6443 SKIP_BLANKS;
6444 GROW;
6445 }
6446 if ((cur != NULL) && (last != NULL)) {
6447 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006448 if (last != NULL)
6449 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006450 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006451 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006452 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6453"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006454 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006455 }
Owen Taylor3473f882001-02-23 17:55:21 +00006456 NEXT;
6457 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006458 if (ret != NULL) {
6459 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6460 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6461 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6462 else
6463 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6464 }
Owen Taylor3473f882001-02-23 17:55:21 +00006465 NEXT;
6466 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006467 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006468 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006469 cur = ret;
6470 /*
6471 * Some normalization:
6472 * (a | b* | c?)* == (a | b | c)*
6473 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006474 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006475 if ((cur->c1 != NULL) &&
6476 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6477 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6478 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6479 if ((cur->c2 != NULL) &&
6480 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6481 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6482 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6483 cur = cur->c2;
6484 }
6485 }
Owen Taylor3473f882001-02-23 17:55:21 +00006486 NEXT;
6487 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006488 if (ret != NULL) {
6489 int found = 0;
6490
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006491 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6492 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6493 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006494 else
6495 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006496 /*
6497 * Some normalization:
6498 * (a | b*)+ == (a | b)*
6499 * (a | b?)+ == (a | b)*
6500 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006501 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006502 if ((cur->c1 != NULL) &&
6503 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6504 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6505 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6506 found = 1;
6507 }
6508 if ((cur->c2 != NULL) &&
6509 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6510 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6511 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6512 found = 1;
6513 }
6514 cur = cur->c2;
6515 }
6516 if (found)
6517 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6518 }
Owen Taylor3473f882001-02-23 17:55:21 +00006519 NEXT;
6520 }
6521 return(ret);
6522}
6523
6524/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006525 * xmlParseElementChildrenContentDecl:
6526 * @ctxt: an XML parser context
6527 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006528 *
6529 * parse the declaration for a Mixed Element content
6530 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6531 *
6532 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6533 *
6534 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6535 *
6536 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6537 *
6538 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6539 *
6540 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6541 * TODO Parameter-entity replacement text must be properly nested
6542 * with parenthesized groups. That is to say, if either of the
6543 * opening or closing parentheses in a choice, seq, or Mixed
6544 * construct is contained in the replacement text for a parameter
6545 * entity, both must be contained in the same replacement text. For
6546 * interoperability, if a parameter-entity reference appears in a
6547 * choice, seq, or Mixed construct, its replacement text should not
6548 * be empty, and neither the first nor last non-blank character of
6549 * the replacement text should be a connector (| or ,).
6550 *
6551 * Returns the tree of xmlElementContentPtr describing the element
6552 * hierarchy.
6553 */
6554xmlElementContentPtr
6555xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6556 /* stub left for API/ABI compat */
6557 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6558}
6559
6560/**
Owen Taylor3473f882001-02-23 17:55:21 +00006561 * xmlParseElementContentDecl:
6562 * @ctxt: an XML parser context
6563 * @name: the name of the element being defined.
6564 * @result: the Element Content pointer will be stored here if any
6565 *
6566 * parse the declaration for an Element content either Mixed or Children,
6567 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006568 *
Owen Taylor3473f882001-02-23 17:55:21 +00006569 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6570 *
6571 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6572 */
6573
6574int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006575xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006576 xmlElementContentPtr *result) {
6577
6578 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006579 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006580 int res;
6581
6582 *result = NULL;
6583
6584 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006585 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006586 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006587 return(-1);
6588 }
6589 NEXT;
6590 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006591 if (ctxt->instate == XML_PARSER_EOF)
6592 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006593 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006594 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006595 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006596 res = XML_ELEMENT_TYPE_MIXED;
6597 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006598 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006599 res = XML_ELEMENT_TYPE_ELEMENT;
6600 }
Owen Taylor3473f882001-02-23 17:55:21 +00006601 SKIP_BLANKS;
6602 *result = tree;
6603 return(res);
6604}
6605
6606/**
6607 * xmlParseElementDecl:
6608 * @ctxt: an XML parser context
6609 *
6610 * parse an Element declaration.
6611 *
6612 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6613 *
6614 * [ VC: Unique Element Type Declaration ]
6615 * No element type may be declared more than once
6616 *
6617 * Returns the type of the element, or -1 in case of error
6618 */
6619int
6620xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006621 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006622 int ret = -1;
6623 xmlElementContentPtr content = NULL;
6624
Daniel Veillard4c778d82005-01-23 17:37:44 +00006625 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006626 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006627 xmlParserInputPtr input = ctxt->input;
6628
6629 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006630 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006631 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6632 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006633 }
6634 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006635 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006636 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006637 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6638 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006639 return(-1);
6640 }
6641 while ((RAW == 0) && (ctxt->inputNr > 1))
6642 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006643 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006644 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6645 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006646 }
6647 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006648 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006649 SKIP(5);
6650 /*
6651 * Element must always be empty.
6652 */
6653 ret = XML_ELEMENT_TYPE_EMPTY;
6654 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6655 (NXT(2) == 'Y')) {
6656 SKIP(3);
6657 /*
6658 * Element is a generic container.
6659 */
6660 ret = XML_ELEMENT_TYPE_ANY;
6661 } else if (RAW == '(') {
6662 ret = xmlParseElementContentDecl(ctxt, name, &content);
6663 } else {
6664 /*
6665 * [ WFC: PEs in Internal Subset ] error handling.
6666 */
6667 if ((RAW == '%') && (ctxt->external == 0) &&
6668 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006669 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006670 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006671 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006672 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006673 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6674 }
Owen Taylor3473f882001-02-23 17:55:21 +00006675 return(-1);
6676 }
6677
6678 SKIP_BLANKS;
6679 /*
6680 * Pop-up of finished entities.
6681 */
6682 while ((RAW == 0) && (ctxt->inputNr > 1))
6683 xmlPopInput(ctxt);
6684 SKIP_BLANKS;
6685
6686 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006687 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006688 if (content != NULL) {
6689 xmlFreeDocElementContent(ctxt->myDoc, content);
6690 }
Owen Taylor3473f882001-02-23 17:55:21 +00006691 } else {
6692 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006693 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6694 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006695 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006696
Owen Taylor3473f882001-02-23 17:55:21 +00006697 NEXT;
6698 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006699 (ctxt->sax->elementDecl != NULL)) {
6700 if (content != NULL)
6701 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006702 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6703 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006704 if ((content != NULL) && (content->parent == NULL)) {
6705 /*
6706 * this is a trick: if xmlAddElementDecl is called,
6707 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006708 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006709 * interfaces or change the API/ABI
6710 */
6711 xmlFreeDocElementContent(ctxt->myDoc, content);
6712 }
6713 } else if (content != NULL) {
6714 xmlFreeDocElementContent(ctxt->myDoc, content);
6715 }
Owen Taylor3473f882001-02-23 17:55:21 +00006716 }
Owen Taylor3473f882001-02-23 17:55:21 +00006717 }
6718 return(ret);
6719}
6720
6721/**
Owen Taylor3473f882001-02-23 17:55:21 +00006722 * xmlParseConditionalSections
6723 * @ctxt: an XML parser context
6724 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006725 * [61] conditionalSect ::= includeSect | ignoreSect
6726 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006727 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6728 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6729 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6730 */
6731
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006732static void
Owen Taylor3473f882001-02-23 17:55:21 +00006733xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006734 int id = ctxt->input->id;
6735
Owen Taylor3473f882001-02-23 17:55:21 +00006736 SKIP(3);
6737 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006738 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006739 SKIP(7);
6740 SKIP_BLANKS;
6741 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006742 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006743 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006744 if (ctxt->input->id != id) {
6745 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6746 "All markup of the conditional section is not in the same entity\n",
6747 NULL, NULL);
6748 }
Owen Taylor3473f882001-02-23 17:55:21 +00006749 NEXT;
6750 }
6751 if (xmlParserDebugEntities) {
6752 if ((ctxt->input != NULL) && (ctxt->input->filename))
6753 xmlGenericError(xmlGenericErrorContext,
6754 "%s(%d): ", ctxt->input->filename,
6755 ctxt->input->line);
6756 xmlGenericError(xmlGenericErrorContext,
6757 "Entering INCLUDE Conditional Section\n");
6758 }
6759
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006760 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6761 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006762 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006763 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006764
6765 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6766 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006767 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006768 NEXT;
6769 } else if (RAW == '%') {
6770 xmlParsePEReference(ctxt);
6771 } else
6772 xmlParseMarkupDecl(ctxt);
6773
6774 /*
6775 * Pop-up of finished entities.
6776 */
6777 while ((RAW == 0) && (ctxt->inputNr > 1))
6778 xmlPopInput(ctxt);
6779
Daniel Veillardfdc91562002-07-01 21:52:03 +00006780 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006781 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006782 break;
6783 }
6784 }
6785 if (xmlParserDebugEntities) {
6786 if ((ctxt->input != NULL) && (ctxt->input->filename))
6787 xmlGenericError(xmlGenericErrorContext,
6788 "%s(%d): ", ctxt->input->filename,
6789 ctxt->input->line);
6790 xmlGenericError(xmlGenericErrorContext,
6791 "Leaving INCLUDE Conditional Section\n");
6792 }
6793
Daniel Veillarda07050d2003-10-19 14:46:32 +00006794 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006795 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006796 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006797 int depth = 0;
6798
6799 SKIP(6);
6800 SKIP_BLANKS;
6801 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006802 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006803 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006804 if (ctxt->input->id != id) {
6805 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6806 "All markup of the conditional section is not in the same entity\n",
6807 NULL, NULL);
6808 }
Owen Taylor3473f882001-02-23 17:55:21 +00006809 NEXT;
6810 }
6811 if (xmlParserDebugEntities) {
6812 if ((ctxt->input != NULL) && (ctxt->input->filename))
6813 xmlGenericError(xmlGenericErrorContext,
6814 "%s(%d): ", ctxt->input->filename,
6815 ctxt->input->line);
6816 xmlGenericError(xmlGenericErrorContext,
6817 "Entering IGNORE Conditional Section\n");
6818 }
6819
6820 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006821 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006822 * But disable SAX event generating DTD building in the meantime
6823 */
6824 state = ctxt->disableSAX;
6825 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006826 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006827 ctxt->instate = XML_PARSER_IGNORE;
6828
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006829 while (((depth >= 0) && (RAW != 0)) &&
6830 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006831 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6832 depth++;
6833 SKIP(3);
6834 continue;
6835 }
6836 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6837 if (--depth >= 0) SKIP(3);
6838 continue;
6839 }
6840 NEXT;
6841 continue;
6842 }
6843
6844 ctxt->disableSAX = state;
6845 ctxt->instate = instate;
6846
6847 if (xmlParserDebugEntities) {
6848 if ((ctxt->input != NULL) && (ctxt->input->filename))
6849 xmlGenericError(xmlGenericErrorContext,
6850 "%s(%d): ", ctxt->input->filename,
6851 ctxt->input->line);
6852 xmlGenericError(xmlGenericErrorContext,
6853 "Leaving IGNORE Conditional Section\n");
6854 }
6855
6856 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006857 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006858 }
6859
6860 if (RAW == 0)
6861 SHRINK;
6862
6863 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006864 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006865 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006866 if (ctxt->input->id != id) {
6867 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6868 "All markup of the conditional section is not in the same entity\n",
6869 NULL, NULL);
6870 }
Owen Taylor3473f882001-02-23 17:55:21 +00006871 SKIP(3);
6872 }
6873}
6874
6875/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006876 * xmlParseMarkupDecl:
6877 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006878 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006879 * parse Markup declarations
6880 *
6881 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6882 * NotationDecl | PI | Comment
6883 *
6884 * [ VC: Proper Declaration/PE Nesting ]
6885 * Parameter-entity replacement text must be properly nested with
6886 * markup declarations. That is to say, if either the first character
6887 * or the last character of a markup declaration (markupdecl above) is
6888 * contained in the replacement text for a parameter-entity reference,
6889 * both must be contained in the same replacement text.
6890 *
6891 * [ WFC: PEs in Internal Subset ]
6892 * In the internal DTD subset, parameter-entity references can occur
6893 * only where markup declarations can occur, not within markup declarations.
6894 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006895 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006896 */
6897void
6898xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6899 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006900 if (CUR == '<') {
6901 if (NXT(1) == '!') {
6902 switch (NXT(2)) {
6903 case 'E':
6904 if (NXT(3) == 'L')
6905 xmlParseElementDecl(ctxt);
6906 else if (NXT(3) == 'N')
6907 xmlParseEntityDecl(ctxt);
6908 break;
6909 case 'A':
6910 xmlParseAttributeListDecl(ctxt);
6911 break;
6912 case 'N':
6913 xmlParseNotationDecl(ctxt);
6914 break;
6915 case '-':
6916 xmlParseComment(ctxt);
6917 break;
6918 default:
6919 /* there is an error but it will be detected later */
6920 break;
6921 }
6922 } else if (NXT(1) == '?') {
6923 xmlParsePI(ctxt);
6924 }
6925 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006926 /*
6927 * This is only for internal subset. On external entities,
6928 * the replacement is done before parsing stage
6929 */
6930 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6931 xmlParsePEReference(ctxt);
6932
6933 /*
6934 * Conditional sections are allowed from entities included
6935 * by PE References in the internal subset.
6936 */
6937 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6938 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6939 xmlParseConditionalSections(ctxt);
6940 }
6941 }
6942
6943 ctxt->instate = XML_PARSER_DTD;
6944}
6945
6946/**
6947 * xmlParseTextDecl:
6948 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006949 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006950 * parse an XML declaration header for external entities
6951 *
6952 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006953 */
6954
6955void
6956xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6957 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006958 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006959
6960 /*
6961 * We know that '<?xml' is here.
6962 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006963 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006964 SKIP(5);
6965 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006966 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006967 return;
6968 }
6969
William M. Brack76e95df2003-10-18 16:20:14 +00006970 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006971 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6972 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006973 }
6974 SKIP_BLANKS;
6975
6976 /*
6977 * We may have the VersionInfo here.
6978 */
6979 version = xmlParseVersionInfo(ctxt);
6980 if (version == NULL)
6981 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006982 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006983 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006984 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6985 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006986 }
6987 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006988 ctxt->input->version = version;
6989
6990 /*
6991 * We must have the encoding declaration
6992 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006993 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006994 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6995 /*
6996 * The XML REC instructs us to stop parsing right here
6997 */
6998 return;
6999 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007000 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7001 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7002 "Missing encoding in text declaration\n");
7003 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007004
7005 SKIP_BLANKS;
7006 if ((RAW == '?') && (NXT(1) == '>')) {
7007 SKIP(2);
7008 } else if (RAW == '>') {
7009 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007010 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007011 NEXT;
7012 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007013 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007014 MOVETO_ENDTAG(CUR_PTR);
7015 NEXT;
7016 }
7017}
7018
7019/**
Owen Taylor3473f882001-02-23 17:55:21 +00007020 * xmlParseExternalSubset:
7021 * @ctxt: an XML parser context
7022 * @ExternalID: the external identifier
7023 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007024 *
Owen Taylor3473f882001-02-23 17:55:21 +00007025 * parse Markup declarations from an external subset
7026 *
7027 * [30] extSubset ::= textDecl? extSubsetDecl
7028 *
7029 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7030 */
7031void
7032xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7033 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007034 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007035 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007036
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007037 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007038 (ctxt->input->end - ctxt->input->cur >= 4)) {
7039 xmlChar start[4];
7040 xmlCharEncoding enc;
7041
7042 start[0] = RAW;
7043 start[1] = NXT(1);
7044 start[2] = NXT(2);
7045 start[3] = NXT(3);
7046 enc = xmlDetectCharEncoding(start, 4);
7047 if (enc != XML_CHAR_ENCODING_NONE)
7048 xmlSwitchEncoding(ctxt, enc);
7049 }
7050
Daniel Veillarda07050d2003-10-19 14:46:32 +00007051 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007052 xmlParseTextDecl(ctxt);
7053 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7054 /*
7055 * The XML REC instructs us to stop parsing right here
7056 */
7057 ctxt->instate = XML_PARSER_EOF;
7058 return;
7059 }
7060 }
7061 if (ctxt->myDoc == NULL) {
7062 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007063 if (ctxt->myDoc == NULL) {
7064 xmlErrMemory(ctxt, "New Doc failed");
7065 return;
7066 }
7067 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007068 }
7069 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7070 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7071
7072 ctxt->instate = XML_PARSER_DTD;
7073 ctxt->external = 1;
7074 while (((RAW == '<') && (NXT(1) == '?')) ||
7075 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007076 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007077 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007078 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007079
7080 GROW;
7081 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7082 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007083 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007084 NEXT;
7085 } else if (RAW == '%') {
7086 xmlParsePEReference(ctxt);
7087 } else
7088 xmlParseMarkupDecl(ctxt);
7089
7090 /*
7091 * Pop-up of finished entities.
7092 */
7093 while ((RAW == 0) && (ctxt->inputNr > 1))
7094 xmlPopInput(ctxt);
7095
Daniel Veillardfdc91562002-07-01 21:52:03 +00007096 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007097 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007098 break;
7099 }
7100 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007101
Owen Taylor3473f882001-02-23 17:55:21 +00007102 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007103 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007104 }
7105
7106}
7107
7108/**
7109 * xmlParseReference:
7110 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007111 *
Owen Taylor3473f882001-02-23 17:55:21 +00007112 * parse and handle entity references in content, depending on the SAX
7113 * interface, this may end-up in a call to character() if this is a
7114 * CharRef, a predefined entity, if there is no reference() callback.
7115 * or if the parser was asked to switch to that mode.
7116 *
7117 * [67] Reference ::= EntityRef | CharRef
7118 */
7119void
7120xmlParseReference(xmlParserCtxtPtr ctxt) {
7121 xmlEntityPtr ent;
7122 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007123 int was_checked;
7124 xmlNodePtr list = NULL;
7125 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007126
Daniel Veillard0161e632008-08-28 15:36:32 +00007127
7128 if (RAW != '&')
7129 return;
7130
7131 /*
7132 * Simple case of a CharRef
7133 */
Owen Taylor3473f882001-02-23 17:55:21 +00007134 if (NXT(1) == '#') {
7135 int i = 0;
7136 xmlChar out[10];
7137 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007138 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007139
Daniel Veillarddc171602008-03-26 17:41:38 +00007140 if (value == 0)
7141 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007142 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7143 /*
7144 * So we are using non-UTF-8 buffers
7145 * Check that the char fit on 8bits, if not
7146 * generate a CharRef.
7147 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007148 if (value <= 0xFF) {
7149 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007150 out[1] = 0;
7151 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7152 (!ctxt->disableSAX))
7153 ctxt->sax->characters(ctxt->userData, out, 1);
7154 } else {
7155 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007156 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007157 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007158 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007159 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7160 (!ctxt->disableSAX))
7161 ctxt->sax->reference(ctxt->userData, out);
7162 }
7163 } else {
7164 /*
7165 * Just encode the value in UTF-8
7166 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007167 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007168 out[i] = 0;
7169 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7170 (!ctxt->disableSAX))
7171 ctxt->sax->characters(ctxt->userData, out, i);
7172 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007173 return;
7174 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007175
Daniel Veillard0161e632008-08-28 15:36:32 +00007176 /*
7177 * We are seeing an entity reference
7178 */
7179 ent = xmlParseEntityRef(ctxt);
7180 if (ent == NULL) return;
7181 if (!ctxt->wellFormed)
7182 return;
7183 was_checked = ent->checked;
7184
7185 /* special case of predefined entities */
7186 if ((ent->name == NULL) ||
7187 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7188 val = ent->content;
7189 if (val == NULL) return;
7190 /*
7191 * inline the entity.
7192 */
7193 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7194 (!ctxt->disableSAX))
7195 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7196 return;
7197 }
7198
7199 /*
7200 * The first reference to the entity trigger a parsing phase
7201 * where the ent->children is filled with the result from
7202 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007203 * Note: external parsed entities will not be loaded, it is not
7204 * required for a non-validating parser, unless the parsing option
7205 * of validating, or substituting entities were given. Doing so is
7206 * far more secure as the parser will only process data coming from
7207 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007208 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007209 if ((ent->checked == 0) &&
7210 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7211 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007212 unsigned long oldnbent = ctxt->nbentities;
7213
7214 /*
7215 * This is a bit hackish but this seems the best
7216 * way to make sure both SAX and DOM entity support
7217 * behaves okay.
7218 */
7219 void *user_data;
7220 if (ctxt->userData == ctxt)
7221 user_data = NULL;
7222 else
7223 user_data = ctxt->userData;
7224
7225 /*
7226 * Check that this entity is well formed
7227 * 4.3.2: An internal general parsed entity is well-formed
7228 * if its replacement text matches the production labeled
7229 * content.
7230 */
7231 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7232 ctxt->depth++;
7233 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7234 user_data, &list);
7235 ctxt->depth--;
7236
7237 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7238 ctxt->depth++;
7239 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7240 user_data, ctxt->depth, ent->URI,
7241 ent->ExternalID, &list);
7242 ctxt->depth--;
7243 } else {
7244 ret = XML_ERR_ENTITY_PE_INTERNAL;
7245 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7246 "invalid entity type found\n", NULL);
7247 }
7248
7249 /*
7250 * Store the number of entities needing parsing for this entity
7251 * content and do checkings
7252 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007253 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7254 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7255 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007256 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007257 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007258 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007259 return;
7260 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007261 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007262 xmlFreeNodeList(list);
7263 return;
7264 }
Owen Taylor3473f882001-02-23 17:55:21 +00007265
Daniel Veillard0161e632008-08-28 15:36:32 +00007266 if ((ret == XML_ERR_OK) && (list != NULL)) {
7267 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7268 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7269 (ent->children == NULL)) {
7270 ent->children = list;
7271 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007272 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007273 * Prune it directly in the generated document
7274 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007275 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007276 if (((list->type == XML_TEXT_NODE) &&
7277 (list->next == NULL)) ||
7278 (ctxt->parseMode == XML_PARSE_READER)) {
7279 list->parent = (xmlNodePtr) ent;
7280 list = NULL;
7281 ent->owner = 1;
7282 } else {
7283 ent->owner = 0;
7284 while (list != NULL) {
7285 list->parent = (xmlNodePtr) ctxt->node;
7286 list->doc = ctxt->myDoc;
7287 if (list->next == NULL)
7288 ent->last = list;
7289 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007290 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007291 list = ent->children;
7292#ifdef LIBXML_LEGACY_ENABLED
7293 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7294 xmlAddEntityReference(ent, list, NULL);
7295#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007296 }
7297 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007298 ent->owner = 1;
7299 while (list != NULL) {
7300 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007301 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007302 if (list->next == NULL)
7303 ent->last = list;
7304 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007305 }
7306 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007307 } else {
7308 xmlFreeNodeList(list);
7309 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007310 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007311 } else if ((ret != XML_ERR_OK) &&
7312 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7313 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7314 "Entity '%s' failed to parse\n", ent->name);
7315 } else if (list != NULL) {
7316 xmlFreeNodeList(list);
7317 list = NULL;
7318 }
7319 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007320 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007321 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007322 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007323 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007324
Daniel Veillard0161e632008-08-28 15:36:32 +00007325 /*
7326 * Now that the entity content has been gathered
7327 * provide it to the application, this can take different forms based
7328 * on the parsing modes.
7329 */
7330 if (ent->children == NULL) {
7331 /*
7332 * Probably running in SAX mode and the callbacks don't
7333 * build the entity content. So unless we already went
7334 * though parsing for first checking go though the entity
7335 * content to generate callbacks associated to the entity
7336 */
7337 if (was_checked != 0) {
7338 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007339 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007340 * This is a bit hackish but this seems the best
7341 * way to make sure both SAX and DOM entity support
7342 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007343 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007344 if (ctxt->userData == ctxt)
7345 user_data = NULL;
7346 else
7347 user_data = ctxt->userData;
7348
7349 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7350 ctxt->depth++;
7351 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7352 ent->content, user_data, NULL);
7353 ctxt->depth--;
7354 } else if (ent->etype ==
7355 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7356 ctxt->depth++;
7357 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7358 ctxt->sax, user_data, ctxt->depth,
7359 ent->URI, ent->ExternalID, NULL);
7360 ctxt->depth--;
7361 } else {
7362 ret = XML_ERR_ENTITY_PE_INTERNAL;
7363 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7364 "invalid entity type found\n", NULL);
7365 }
7366 if (ret == XML_ERR_ENTITY_LOOP) {
7367 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7368 return;
7369 }
7370 }
7371 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7372 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7373 /*
7374 * Entity reference callback comes second, it's somewhat
7375 * superfluous but a compatibility to historical behaviour
7376 */
7377 ctxt->sax->reference(ctxt->userData, ent->name);
7378 }
7379 return;
7380 }
7381
7382 /*
7383 * If we didn't get any children for the entity being built
7384 */
7385 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7386 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7387 /*
7388 * Create a node.
7389 */
7390 ctxt->sax->reference(ctxt->userData, ent->name);
7391 return;
7392 }
7393
7394 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7395 /*
7396 * There is a problem on the handling of _private for entities
7397 * (bug 155816): Should we copy the content of the field from
7398 * the entity (possibly overwriting some value set by the user
7399 * when a copy is created), should we leave it alone, or should
7400 * we try to take care of different situations? The problem
7401 * is exacerbated by the usage of this field by the xmlReader.
7402 * To fix this bug, we look at _private on the created node
7403 * and, if it's NULL, we copy in whatever was in the entity.
7404 * If it's not NULL we leave it alone. This is somewhat of a
7405 * hack - maybe we should have further tests to determine
7406 * what to do.
7407 */
7408 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7409 /*
7410 * Seems we are generating the DOM content, do
7411 * a simple tree copy for all references except the first
7412 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007413 */
7414 if (((list == NULL) && (ent->owner == 0)) ||
7415 (ctxt->parseMode == XML_PARSE_READER)) {
7416 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7417
7418 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007419 * We are copying here, make sure there is no abuse
7420 */
7421 ctxt->sizeentcopy += ent->length;
7422 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7423 return;
7424
7425 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007426 * when operating on a reader, the entities definitions
7427 * are always owning the entities subtree.
7428 if (ctxt->parseMode == XML_PARSE_READER)
7429 ent->owner = 1;
7430 */
7431
7432 cur = ent->children;
7433 while (cur != NULL) {
7434 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7435 if (nw != NULL) {
7436 if (nw->_private == NULL)
7437 nw->_private = cur->_private;
7438 if (firstChild == NULL){
7439 firstChild = nw;
7440 }
7441 nw = xmlAddChild(ctxt->node, nw);
7442 }
7443 if (cur == ent->last) {
7444 /*
7445 * needed to detect some strange empty
7446 * node cases in the reader tests
7447 */
7448 if ((ctxt->parseMode == XML_PARSE_READER) &&
7449 (nw != NULL) &&
7450 (nw->type == XML_ELEMENT_NODE) &&
7451 (nw->children == NULL))
7452 nw->extra = 1;
7453
7454 break;
7455 }
7456 cur = cur->next;
7457 }
7458#ifdef LIBXML_LEGACY_ENABLED
7459 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7460 xmlAddEntityReference(ent, firstChild, nw);
7461#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007462 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007463 xmlNodePtr nw = NULL, cur, next, last,
7464 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007465
7466 /*
7467 * We are copying here, make sure there is no abuse
7468 */
7469 ctxt->sizeentcopy += ent->length;
7470 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7471 return;
7472
Daniel Veillard0161e632008-08-28 15:36:32 +00007473 /*
7474 * Copy the entity child list and make it the new
7475 * entity child list. The goal is to make sure any
7476 * ID or REF referenced will be the one from the
7477 * document content and not the entity copy.
7478 */
7479 cur = ent->children;
7480 ent->children = NULL;
7481 last = ent->last;
7482 ent->last = NULL;
7483 while (cur != NULL) {
7484 next = cur->next;
7485 cur->next = NULL;
7486 cur->parent = NULL;
7487 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7488 if (nw != NULL) {
7489 if (nw->_private == NULL)
7490 nw->_private = cur->_private;
7491 if (firstChild == NULL){
7492 firstChild = cur;
7493 }
7494 xmlAddChild((xmlNodePtr) ent, nw);
7495 xmlAddChild(ctxt->node, cur);
7496 }
7497 if (cur == last)
7498 break;
7499 cur = next;
7500 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007501 if (ent->owner == 0)
7502 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007503#ifdef LIBXML_LEGACY_ENABLED
7504 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7505 xmlAddEntityReference(ent, firstChild, nw);
7506#endif /* LIBXML_LEGACY_ENABLED */
7507 } else {
7508 const xmlChar *nbktext;
7509
7510 /*
7511 * the name change is to avoid coalescing of the
7512 * node with a possible previous text one which
7513 * would make ent->children a dangling pointer
7514 */
7515 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7516 -1);
7517 if (ent->children->type == XML_TEXT_NODE)
7518 ent->children->name = nbktext;
7519 if ((ent->last != ent->children) &&
7520 (ent->last->type == XML_TEXT_NODE))
7521 ent->last->name = nbktext;
7522 xmlAddChildList(ctxt->node, ent->children);
7523 }
7524
7525 /*
7526 * This is to avoid a nasty side effect, see
7527 * characters() in SAX.c
7528 */
7529 ctxt->nodemem = 0;
7530 ctxt->nodelen = 0;
7531 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007532 }
7533 }
7534}
7535
7536/**
7537 * xmlParseEntityRef:
7538 * @ctxt: an XML parser context
7539 *
7540 * parse ENTITY references declarations
7541 *
7542 * [68] EntityRef ::= '&' Name ';'
7543 *
7544 * [ WFC: Entity Declared ]
7545 * In a document without any DTD, a document with only an internal DTD
7546 * subset which contains no parameter entity references, or a document
7547 * with "standalone='yes'", the Name given in the entity reference
7548 * must match that in an entity declaration, except that well-formed
7549 * documents need not declare any of the following entities: amp, lt,
7550 * gt, apos, quot. The declaration of a parameter entity must precede
7551 * any reference to it. Similarly, the declaration of a general entity
7552 * must precede any reference to it which appears in a default value in an
7553 * attribute-list declaration. Note that if entities are declared in the
7554 * external subset or in external parameter entities, a non-validating
7555 * processor is not obligated to read and process their declarations;
7556 * for such documents, the rule that an entity must be declared is a
7557 * well-formedness constraint only if standalone='yes'.
7558 *
7559 * [ WFC: Parsed Entity ]
7560 * An entity reference must not contain the name of an unparsed entity
7561 *
7562 * Returns the xmlEntityPtr if found, or NULL otherwise.
7563 */
7564xmlEntityPtr
7565xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007566 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007567 xmlEntityPtr ent = NULL;
7568
7569 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007570 if (ctxt->instate == XML_PARSER_EOF)
7571 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007572
Daniel Veillard0161e632008-08-28 15:36:32 +00007573 if (RAW != '&')
7574 return(NULL);
7575 NEXT;
7576 name = xmlParseName(ctxt);
7577 if (name == NULL) {
7578 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7579 "xmlParseEntityRef: no name\n");
7580 return(NULL);
7581 }
7582 if (RAW != ';') {
7583 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7584 return(NULL);
7585 }
7586 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007587
Daniel Veillard0161e632008-08-28 15:36:32 +00007588 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007589 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007590 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007591 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7592 ent = xmlGetPredefinedEntity(name);
7593 if (ent != NULL)
7594 return(ent);
7595 }
Owen Taylor3473f882001-02-23 17:55:21 +00007596
Daniel Veillard0161e632008-08-28 15:36:32 +00007597 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007598 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007599 */
7600 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007601
Daniel Veillard0161e632008-08-28 15:36:32 +00007602 /*
7603 * Ask first SAX for entity resolution, otherwise try the
7604 * entities which may have stored in the parser context.
7605 */
7606 if (ctxt->sax != NULL) {
7607 if (ctxt->sax->getEntity != NULL)
7608 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007609 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007610 (ctxt->options & XML_PARSE_OLDSAX))
7611 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007612 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7613 (ctxt->userData==ctxt)) {
7614 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007615 }
7616 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007617 if (ctxt->instate == XML_PARSER_EOF)
7618 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007619 /*
7620 * [ WFC: Entity Declared ]
7621 * In a document without any DTD, a document with only an
7622 * internal DTD subset which contains no parameter entity
7623 * references, or a document with "standalone='yes'", the
7624 * Name given in the entity reference must match that in an
7625 * entity declaration, except that well-formed documents
7626 * need not declare any of the following entities: amp, lt,
7627 * gt, apos, quot.
7628 * The declaration of a parameter entity must precede any
7629 * reference to it.
7630 * Similarly, the declaration of a general entity must
7631 * precede any reference to it which appears in a default
7632 * value in an attribute-list declaration. Note that if
7633 * entities are declared in the external subset or in
7634 * external parameter entities, a non-validating processor
7635 * is not obligated to read and process their declarations;
7636 * for such documents, the rule that an entity must be
7637 * declared is a well-formedness constraint only if
7638 * standalone='yes'.
7639 */
7640 if (ent == NULL) {
7641 if ((ctxt->standalone == 1) ||
7642 ((ctxt->hasExternalSubset == 0) &&
7643 (ctxt->hasPErefs == 0))) {
7644 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7645 "Entity '%s' not defined\n", name);
7646 } else {
7647 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7648 "Entity '%s' not defined\n", name);
7649 if ((ctxt->inSubset == 0) &&
7650 (ctxt->sax != NULL) &&
7651 (ctxt->sax->reference != NULL)) {
7652 ctxt->sax->reference(ctxt->userData, name);
7653 }
7654 }
7655 ctxt->valid = 0;
7656 }
7657
7658 /*
7659 * [ WFC: Parsed Entity ]
7660 * An entity reference must not contain the name of an
7661 * unparsed entity
7662 */
7663 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7664 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7665 "Entity reference to unparsed entity %s\n", name);
7666 }
7667
7668 /*
7669 * [ WFC: No External Entity References ]
7670 * Attribute values cannot contain direct or indirect
7671 * entity references to external entities.
7672 */
7673 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7674 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7675 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7676 "Attribute references external entity '%s'\n", name);
7677 }
7678 /*
7679 * [ WFC: No < in Attribute Values ]
7680 * The replacement text of any entity referred to directly or
7681 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007682 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007683 */
7684 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007685 (ent != NULL) &&
7686 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7687 if ((ent->checked & 1) || ((ent->checked == 0) &&
7688 (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7689 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7690 "'<' in entity '%s' is not allowed in attributes values\n", name);
7691 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007692 }
7693
7694 /*
7695 * Internal check, no parameter entities here ...
7696 */
7697 else {
7698 switch (ent->etype) {
7699 case XML_INTERNAL_PARAMETER_ENTITY:
7700 case XML_EXTERNAL_PARAMETER_ENTITY:
7701 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7702 "Attempt to reference the parameter entity '%s'\n",
7703 name);
7704 break;
7705 default:
7706 break;
7707 }
7708 }
7709
7710 /*
7711 * [ WFC: No Recursion ]
7712 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007713 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007714 * Done somewhere else
7715 */
Owen Taylor3473f882001-02-23 17:55:21 +00007716 return(ent);
7717}
7718
7719/**
7720 * xmlParseStringEntityRef:
7721 * @ctxt: an XML parser context
7722 * @str: a pointer to an index in the string
7723 *
7724 * parse ENTITY references declarations, but this version parses it from
7725 * a string value.
7726 *
7727 * [68] EntityRef ::= '&' Name ';'
7728 *
7729 * [ WFC: Entity Declared ]
7730 * In a document without any DTD, a document with only an internal DTD
7731 * subset which contains no parameter entity references, or a document
7732 * with "standalone='yes'", the Name given in the entity reference
7733 * must match that in an entity declaration, except that well-formed
7734 * documents need not declare any of the following entities: amp, lt,
7735 * gt, apos, quot. The declaration of a parameter entity must precede
7736 * any reference to it. Similarly, the declaration of a general entity
7737 * must precede any reference to it which appears in a default value in an
7738 * attribute-list declaration. Note that if entities are declared in the
7739 * external subset or in external parameter entities, a non-validating
7740 * processor is not obligated to read and process their declarations;
7741 * for such documents, the rule that an entity must be declared is a
7742 * well-formedness constraint only if standalone='yes'.
7743 *
7744 * [ WFC: Parsed Entity ]
7745 * An entity reference must not contain the name of an unparsed entity
7746 *
7747 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7748 * is updated to the current location in the string.
7749 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007750static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007751xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7752 xmlChar *name;
7753 const xmlChar *ptr;
7754 xmlChar cur;
7755 xmlEntityPtr ent = NULL;
7756
7757 if ((str == NULL) || (*str == NULL))
7758 return(NULL);
7759 ptr = *str;
7760 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007761 if (cur != '&')
7762 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007763
Daniel Veillard0161e632008-08-28 15:36:32 +00007764 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007765 name = xmlParseStringName(ctxt, &ptr);
7766 if (name == NULL) {
7767 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7768 "xmlParseStringEntityRef: no name\n");
7769 *str = ptr;
7770 return(NULL);
7771 }
7772 if (*ptr != ';') {
7773 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007774 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007775 *str = ptr;
7776 return(NULL);
7777 }
7778 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007779
Owen Taylor3473f882001-02-23 17:55:21 +00007780
Daniel Veillard0161e632008-08-28 15:36:32 +00007781 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007782 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007783 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007784 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7785 ent = xmlGetPredefinedEntity(name);
7786 if (ent != NULL) {
7787 xmlFree(name);
7788 *str = ptr;
7789 return(ent);
7790 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007791 }
Owen Taylor3473f882001-02-23 17:55:21 +00007792
Daniel Veillard0161e632008-08-28 15:36:32 +00007793 /*
7794 * Increate the number of entity references parsed
7795 */
7796 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007797
Daniel Veillard0161e632008-08-28 15:36:32 +00007798 /*
7799 * Ask first SAX for entity resolution, otherwise try the
7800 * entities which may have stored in the parser context.
7801 */
7802 if (ctxt->sax != NULL) {
7803 if (ctxt->sax->getEntity != NULL)
7804 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007805 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7806 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007807 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7808 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007809 }
7810 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007811 if (ctxt->instate == XML_PARSER_EOF) {
7812 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007813 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007814 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007815
7816 /*
7817 * [ WFC: Entity Declared ]
7818 * In a document without any DTD, a document with only an
7819 * internal DTD subset which contains no parameter entity
7820 * references, or a document with "standalone='yes'", the
7821 * Name given in the entity reference must match that in an
7822 * entity declaration, except that well-formed documents
7823 * need not declare any of the following entities: amp, lt,
7824 * gt, apos, quot.
7825 * The declaration of a parameter entity must precede any
7826 * reference to it.
7827 * Similarly, the declaration of a general entity must
7828 * precede any reference to it which appears in a default
7829 * value in an attribute-list declaration. Note that if
7830 * entities are declared in the external subset or in
7831 * external parameter entities, a non-validating processor
7832 * is not obligated to read and process their declarations;
7833 * for such documents, the rule that an entity must be
7834 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007835 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007836 */
7837 if (ent == NULL) {
7838 if ((ctxt->standalone == 1) ||
7839 ((ctxt->hasExternalSubset == 0) &&
7840 (ctxt->hasPErefs == 0))) {
7841 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7842 "Entity '%s' not defined\n", name);
7843 } else {
7844 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7845 "Entity '%s' not defined\n",
7846 name);
7847 }
7848 /* TODO ? check regressions ctxt->valid = 0; */
7849 }
7850
7851 /*
7852 * [ WFC: Parsed Entity ]
7853 * An entity reference must not contain the name of an
7854 * unparsed entity
7855 */
7856 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7857 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7858 "Entity reference to unparsed entity %s\n", name);
7859 }
7860
7861 /*
7862 * [ WFC: No External Entity References ]
7863 * Attribute values cannot contain direct or indirect
7864 * entity references to external entities.
7865 */
7866 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7867 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7868 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7869 "Attribute references external entity '%s'\n", name);
7870 }
7871 /*
7872 * [ WFC: No < in Attribute Values ]
7873 * The replacement text of any entity referred to directly or
7874 * indirectly in an attribute value (other than "&lt;") must
7875 * not contain a <.
7876 */
7877 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7878 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007879 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007880 (xmlStrchr(ent->content, '<'))) {
7881 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7882 "'<' in entity '%s' is not allowed in attributes values\n",
7883 name);
7884 }
7885
7886 /*
7887 * Internal check, no parameter entities here ...
7888 */
7889 else {
7890 switch (ent->etype) {
7891 case XML_INTERNAL_PARAMETER_ENTITY:
7892 case XML_EXTERNAL_PARAMETER_ENTITY:
7893 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7894 "Attempt to reference the parameter entity '%s'\n",
7895 name);
7896 break;
7897 default:
7898 break;
7899 }
7900 }
7901
7902 /*
7903 * [ WFC: No Recursion ]
7904 * A parsed entity must not contain a recursive reference
7905 * to itself, either directly or indirectly.
7906 * Done somewhere else
7907 */
7908
7909 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007910 *str = ptr;
7911 return(ent);
7912}
7913
7914/**
7915 * xmlParsePEReference:
7916 * @ctxt: an XML parser context
7917 *
7918 * parse PEReference declarations
7919 * The entity content is handled directly by pushing it's content as
7920 * a new input stream.
7921 *
7922 * [69] PEReference ::= '%' Name ';'
7923 *
7924 * [ WFC: No Recursion ]
7925 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007926 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007927 *
7928 * [ WFC: Entity Declared ]
7929 * In a document without any DTD, a document with only an internal DTD
7930 * subset which contains no parameter entity references, or a document
7931 * with "standalone='yes'", ... ... The declaration of a parameter
7932 * entity must precede any reference to it...
7933 *
7934 * [ VC: Entity Declared ]
7935 * In a document with an external subset or external parameter entities
7936 * with "standalone='no'", ... ... The declaration of a parameter entity
7937 * must precede any reference to it...
7938 *
7939 * [ WFC: In DTD ]
7940 * Parameter-entity references may only appear in the DTD.
7941 * NOTE: misleading but this is handled.
7942 */
7943void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007944xmlParsePEReference(xmlParserCtxtPtr ctxt)
7945{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007946 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007947 xmlEntityPtr entity = NULL;
7948 xmlParserInputPtr input;
7949
Daniel Veillard0161e632008-08-28 15:36:32 +00007950 if (RAW != '%')
7951 return;
7952 NEXT;
7953 name = xmlParseName(ctxt);
7954 if (name == NULL) {
7955 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7956 "xmlParsePEReference: no name\n");
7957 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007958 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007959 if (RAW != ';') {
7960 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7961 return;
7962 }
7963
7964 NEXT;
7965
7966 /*
7967 * Increate the number of entity references parsed
7968 */
7969 ctxt->nbentities++;
7970
7971 /*
7972 * Request the entity from SAX
7973 */
7974 if ((ctxt->sax != NULL) &&
7975 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007976 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7977 if (ctxt->instate == XML_PARSER_EOF)
7978 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007979 if (entity == NULL) {
7980 /*
7981 * [ WFC: Entity Declared ]
7982 * In a document without any DTD, a document with only an
7983 * internal DTD subset which contains no parameter entity
7984 * references, or a document with "standalone='yes'", ...
7985 * ... The declaration of a parameter entity must precede
7986 * any reference to it...
7987 */
7988 if ((ctxt->standalone == 1) ||
7989 ((ctxt->hasExternalSubset == 0) &&
7990 (ctxt->hasPErefs == 0))) {
7991 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7992 "PEReference: %%%s; not found\n",
7993 name);
7994 } else {
7995 /*
7996 * [ VC: Entity Declared ]
7997 * In a document with an external subset or external
7998 * parameter entities with "standalone='no'", ...
7999 * ... The declaration of a parameter entity must
8000 * precede any reference to it...
8001 */
8002 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8003 "PEReference: %%%s; not found\n",
8004 name, NULL);
8005 ctxt->valid = 0;
8006 }
8007 } else {
8008 /*
8009 * Internal checking in case the entity quest barfed
8010 */
8011 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8012 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8013 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8014 "Internal: %%%s; is not a parameter entity\n",
8015 name, NULL);
8016 } else if (ctxt->input->free != deallocblankswrapper) {
8017 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8018 if (xmlPushInput(ctxt, input) < 0)
8019 return;
8020 } else {
8021 /*
8022 * TODO !!!
8023 * handle the extra spaces added before and after
8024 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8025 */
8026 input = xmlNewEntityInputStream(ctxt, entity);
8027 if (xmlPushInput(ctxt, input) < 0)
8028 return;
8029 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8030 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8031 (IS_BLANK_CH(NXT(5)))) {
8032 xmlParseTextDecl(ctxt);
8033 if (ctxt->errNo ==
8034 XML_ERR_UNSUPPORTED_ENCODING) {
8035 /*
8036 * The XML REC instructs us to stop parsing
8037 * right here
8038 */
8039 ctxt->instate = XML_PARSER_EOF;
8040 return;
8041 }
8042 }
8043 }
8044 }
8045 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008046}
8047
8048/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008049 * xmlLoadEntityContent:
8050 * @ctxt: an XML parser context
8051 * @entity: an unloaded system entity
8052 *
8053 * Load the original content of the given system entity from the
8054 * ExternalID/SystemID given. This is to be used for Included in Literal
8055 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8056 *
8057 * Returns 0 in case of success and -1 in case of failure
8058 */
8059static int
8060xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8061 xmlParserInputPtr input;
8062 xmlBufferPtr buf;
8063 int l, c;
8064 int count = 0;
8065
8066 if ((ctxt == NULL) || (entity == NULL) ||
8067 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8068 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8069 (entity->content != NULL)) {
8070 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8071 "xmlLoadEntityContent parameter error");
8072 return(-1);
8073 }
8074
8075 if (xmlParserDebugEntities)
8076 xmlGenericError(xmlGenericErrorContext,
8077 "Reading %s entity content input\n", entity->name);
8078
8079 buf = xmlBufferCreate();
8080 if (buf == NULL) {
8081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8082 "xmlLoadEntityContent parameter error");
8083 return(-1);
8084 }
8085
8086 input = xmlNewEntityInputStream(ctxt, entity);
8087 if (input == NULL) {
8088 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8089 "xmlLoadEntityContent input error");
8090 xmlBufferFree(buf);
8091 return(-1);
8092 }
8093
8094 /*
8095 * Push the entity as the current input, read char by char
8096 * saving to the buffer until the end of the entity or an error
8097 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008098 if (xmlPushInput(ctxt, input) < 0) {
8099 xmlBufferFree(buf);
8100 return(-1);
8101 }
8102
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008103 GROW;
8104 c = CUR_CHAR(l);
8105 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8106 (IS_CHAR(c))) {
8107 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008108 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008109 count = 0;
8110 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008111 if (ctxt->instate == XML_PARSER_EOF) {
8112 xmlBufferFree(buf);
8113 return(-1);
8114 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008115 }
8116 NEXTL(l);
8117 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008118 if (c == 0) {
8119 count = 0;
8120 GROW;
8121 if (ctxt->instate == XML_PARSER_EOF) {
8122 xmlBufferFree(buf);
8123 return(-1);
8124 }
8125 c = CUR_CHAR(l);
8126 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008127 }
8128
8129 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8130 xmlPopInput(ctxt);
8131 } else if (!IS_CHAR(c)) {
8132 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8133 "xmlLoadEntityContent: invalid char value %d\n",
8134 c);
8135 xmlBufferFree(buf);
8136 return(-1);
8137 }
8138 entity->content = buf->content;
8139 buf->content = NULL;
8140 xmlBufferFree(buf);
8141
8142 return(0);
8143}
8144
8145/**
Owen Taylor3473f882001-02-23 17:55:21 +00008146 * xmlParseStringPEReference:
8147 * @ctxt: an XML parser context
8148 * @str: a pointer to an index in the string
8149 *
8150 * parse PEReference declarations
8151 *
8152 * [69] PEReference ::= '%' Name ';'
8153 *
8154 * [ WFC: No Recursion ]
8155 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008156 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008157 *
8158 * [ WFC: Entity Declared ]
8159 * In a document without any DTD, a document with only an internal DTD
8160 * subset which contains no parameter entity references, or a document
8161 * with "standalone='yes'", ... ... The declaration of a parameter
8162 * entity must precede any reference to it...
8163 *
8164 * [ VC: Entity Declared ]
8165 * In a document with an external subset or external parameter entities
8166 * with "standalone='no'", ... ... The declaration of a parameter entity
8167 * must precede any reference to it...
8168 *
8169 * [ WFC: In DTD ]
8170 * Parameter-entity references may only appear in the DTD.
8171 * NOTE: misleading but this is handled.
8172 *
8173 * Returns the string of the entity content.
8174 * str is updated to the current value of the index
8175 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008176static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008177xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8178 const xmlChar *ptr;
8179 xmlChar cur;
8180 xmlChar *name;
8181 xmlEntityPtr entity = NULL;
8182
8183 if ((str == NULL) || (*str == NULL)) return(NULL);
8184 ptr = *str;
8185 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008186 if (cur != '%')
8187 return(NULL);
8188 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008189 name = xmlParseStringName(ctxt, &ptr);
8190 if (name == NULL) {
8191 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8192 "xmlParseStringPEReference: no name\n");
8193 *str = ptr;
8194 return(NULL);
8195 }
8196 cur = *ptr;
8197 if (cur != ';') {
8198 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8199 xmlFree(name);
8200 *str = ptr;
8201 return(NULL);
8202 }
8203 ptr++;
8204
8205 /*
8206 * Increate the number of entity references parsed
8207 */
8208 ctxt->nbentities++;
8209
8210 /*
8211 * Request the entity from SAX
8212 */
8213 if ((ctxt->sax != NULL) &&
8214 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008215 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8216 if (ctxt->instate == XML_PARSER_EOF) {
8217 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008218 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008219 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008220 if (entity == NULL) {
8221 /*
8222 * [ WFC: Entity Declared ]
8223 * In a document without any DTD, a document with only an
8224 * internal DTD subset which contains no parameter entity
8225 * references, or a document with "standalone='yes'", ...
8226 * ... The declaration of a parameter entity must precede
8227 * any reference to it...
8228 */
8229 if ((ctxt->standalone == 1) ||
8230 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8231 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8232 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008233 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008234 /*
8235 * [ VC: Entity Declared ]
8236 * In a document with an external subset or external
8237 * parameter entities with "standalone='no'", ...
8238 * ... The declaration of a parameter entity must
8239 * precede any reference to it...
8240 */
8241 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8242 "PEReference: %%%s; not found\n",
8243 name, NULL);
8244 ctxt->valid = 0;
8245 }
8246 } else {
8247 /*
8248 * Internal checking in case the entity quest barfed
8249 */
8250 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8251 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8252 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8253 "%%%s; is not a parameter entity\n",
8254 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008255 }
8256 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008257 ctxt->hasPErefs = 1;
8258 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008259 *str = ptr;
8260 return(entity);
8261}
8262
8263/**
8264 * xmlParseDocTypeDecl:
8265 * @ctxt: an XML parser context
8266 *
8267 * parse a DOCTYPE declaration
8268 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008269 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008270 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8271 *
8272 * [ VC: Root Element Type ]
8273 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008274 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008275 */
8276
8277void
8278xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008279 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008280 xmlChar *ExternalID = NULL;
8281 xmlChar *URI = NULL;
8282
8283 /*
8284 * We know that '<!DOCTYPE' has been detected.
8285 */
8286 SKIP(9);
8287
8288 SKIP_BLANKS;
8289
8290 /*
8291 * Parse the DOCTYPE name.
8292 */
8293 name = xmlParseName(ctxt);
8294 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008295 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008297 }
8298 ctxt->intSubName = name;
8299
8300 SKIP_BLANKS;
8301
8302 /*
8303 * Check for SystemID and ExternalID
8304 */
8305 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8306
8307 if ((URI != NULL) || (ExternalID != NULL)) {
8308 ctxt->hasExternalSubset = 1;
8309 }
8310 ctxt->extSubURI = URI;
8311 ctxt->extSubSystem = ExternalID;
8312
8313 SKIP_BLANKS;
8314
8315 /*
8316 * Create and update the internal subset.
8317 */
8318 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8319 (!ctxt->disableSAX))
8320 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008321 if (ctxt->instate == XML_PARSER_EOF)
8322 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008323
8324 /*
8325 * Is there any internal subset declarations ?
8326 * they are handled separately in xmlParseInternalSubset()
8327 */
8328 if (RAW == '[')
8329 return;
8330
8331 /*
8332 * We should be at the end of the DOCTYPE declaration.
8333 */
8334 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008335 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008336 }
8337 NEXT;
8338}
8339
8340/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008341 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008342 * @ctxt: an XML parser context
8343 *
8344 * parse the internal subset declaration
8345 *
8346 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8347 */
8348
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008349static void
Owen Taylor3473f882001-02-23 17:55:21 +00008350xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8351 /*
8352 * Is there any DTD definition ?
8353 */
8354 if (RAW == '[') {
8355 ctxt->instate = XML_PARSER_DTD;
8356 NEXT;
8357 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008358 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008359 * PEReferences.
8360 * Subsequence (markupdecl | PEReference | S)*
8361 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008362 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008363 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008364 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008365
8366 SKIP_BLANKS;
8367 xmlParseMarkupDecl(ctxt);
8368 xmlParsePEReference(ctxt);
8369
8370 /*
8371 * Pop-up of finished entities.
8372 */
8373 while ((RAW == 0) && (ctxt->inputNr > 1))
8374 xmlPopInput(ctxt);
8375
8376 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008377 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008378 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008379 break;
8380 }
8381 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008382 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008383 NEXT;
8384 SKIP_BLANKS;
8385 }
8386 }
8387
8388 /*
8389 * We should be at the end of the DOCTYPE declaration.
8390 */
8391 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008392 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008393 }
8394 NEXT;
8395}
8396
Daniel Veillard81273902003-09-30 00:43:48 +00008397#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008398/**
8399 * xmlParseAttribute:
8400 * @ctxt: an XML parser context
8401 * @value: a xmlChar ** used to store the value of the attribute
8402 *
8403 * parse an attribute
8404 *
8405 * [41] Attribute ::= Name Eq AttValue
8406 *
8407 * [ WFC: No External Entity References ]
8408 * Attribute values cannot contain direct or indirect entity references
8409 * to external entities.
8410 *
8411 * [ WFC: No < in Attribute Values ]
8412 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008413 * an attribute value (other than "&lt;") must not contain a <.
8414 *
Owen Taylor3473f882001-02-23 17:55:21 +00008415 * [ VC: Attribute Value Type ]
8416 * The attribute must have been declared; the value must be of the type
8417 * declared for it.
8418 *
8419 * [25] Eq ::= S? '=' S?
8420 *
8421 * With namespace:
8422 *
8423 * [NS 11] Attribute ::= QName Eq AttValue
8424 *
8425 * Also the case QName == xmlns:??? is handled independently as a namespace
8426 * definition.
8427 *
8428 * Returns the attribute name, and the value in *value.
8429 */
8430
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008431const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008432xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008433 const xmlChar *name;
8434 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008435
8436 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008437 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008438 name = xmlParseName(ctxt);
8439 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008440 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008441 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008442 return(NULL);
8443 }
8444
8445 /*
8446 * read the value
8447 */
8448 SKIP_BLANKS;
8449 if (RAW == '=') {
8450 NEXT;
8451 SKIP_BLANKS;
8452 val = xmlParseAttValue(ctxt);
8453 ctxt->instate = XML_PARSER_CONTENT;
8454 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008455 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008456 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008457 return(NULL);
8458 }
8459
8460 /*
8461 * Check that xml:lang conforms to the specification
8462 * No more registered as an error, just generate a warning now
8463 * since this was deprecated in XML second edition
8464 */
8465 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8466 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008467 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8468 "Malformed value for xml:lang : %s\n",
8469 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008470 }
8471 }
8472
8473 /*
8474 * Check that xml:space conforms to the specification
8475 */
8476 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8477 if (xmlStrEqual(val, BAD_CAST "default"))
8478 *(ctxt->space) = 0;
8479 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8480 *(ctxt->space) = 1;
8481 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008482 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008483"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008484 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008485 }
8486 }
8487
8488 *value = val;
8489 return(name);
8490}
8491
8492/**
8493 * xmlParseStartTag:
8494 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008495 *
Owen Taylor3473f882001-02-23 17:55:21 +00008496 * parse a start of tag either for rule element or
8497 * EmptyElement. In both case we don't parse the tag closing chars.
8498 *
8499 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8500 *
8501 * [ WFC: Unique Att Spec ]
8502 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008503 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008504 *
8505 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8506 *
8507 * [ WFC: Unique Att Spec ]
8508 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008509 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008510 *
8511 * With namespace:
8512 *
8513 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8514 *
8515 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8516 *
8517 * Returns the element name parsed
8518 */
8519
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008520const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008521xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008522 const xmlChar *name;
8523 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008524 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008525 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008526 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008527 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008528 int i;
8529
8530 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008531 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008532
8533 name = xmlParseName(ctxt);
8534 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008535 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008536 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008537 return(NULL);
8538 }
8539
8540 /*
8541 * Now parse the attributes, it ends up with the ending
8542 *
8543 * (S Attribute)* S?
8544 */
8545 SKIP_BLANKS;
8546 GROW;
8547
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008548 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008549 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008550 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008551 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008552 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008553
8554 attname = xmlParseAttribute(ctxt, &attvalue);
8555 if ((attname != NULL) && (attvalue != NULL)) {
8556 /*
8557 * [ WFC: Unique Att Spec ]
8558 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008559 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008560 */
8561 for (i = 0; i < nbatts;i += 2) {
8562 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008563 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008564 xmlFree(attvalue);
8565 goto failed;
8566 }
8567 }
Owen Taylor3473f882001-02-23 17:55:21 +00008568 /*
8569 * Add the pair to atts
8570 */
8571 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008572 maxatts = 22; /* allow for 10 attrs by default */
8573 atts = (const xmlChar **)
8574 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008575 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008576 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008577 if (attvalue != NULL)
8578 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008579 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008580 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008581 ctxt->atts = atts;
8582 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008583 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008584 const xmlChar **n;
8585
Owen Taylor3473f882001-02-23 17:55:21 +00008586 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008587 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008588 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008589 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008590 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008591 if (attvalue != NULL)
8592 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008593 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008594 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008595 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008596 ctxt->atts = atts;
8597 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008598 }
8599 atts[nbatts++] = attname;
8600 atts[nbatts++] = attvalue;
8601 atts[nbatts] = NULL;
8602 atts[nbatts + 1] = NULL;
8603 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008604 if (attvalue != NULL)
8605 xmlFree(attvalue);
8606 }
8607
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008608failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008609
Daniel Veillard3772de32002-12-17 10:31:45 +00008610 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008611 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8612 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008613 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8615 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008616 }
8617 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008618 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8619 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008620 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8621 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008622 break;
8623 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008624 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008625 GROW;
8626 }
8627
8628 /*
8629 * SAX: Start of Element !
8630 */
8631 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008632 (!ctxt->disableSAX)) {
8633 if (nbatts > 0)
8634 ctxt->sax->startElement(ctxt->userData, name, atts);
8635 else
8636 ctxt->sax->startElement(ctxt->userData, name, NULL);
8637 }
Owen Taylor3473f882001-02-23 17:55:21 +00008638
8639 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008640 /* Free only the content strings */
8641 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008642 if (atts[i] != NULL)
8643 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008644 }
8645 return(name);
8646}
8647
8648/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008650 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008651 * @line: line of the start tag
8652 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008653 *
8654 * parse an end of tag
8655 *
8656 * [42] ETag ::= '</' Name S? '>'
8657 *
8658 * With namespace
8659 *
8660 * [NS 9] ETag ::= '</' QName S? '>'
8661 */
8662
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008663static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008664xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008665 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008666
8667 GROW;
8668 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008669 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008670 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008671 return;
8672 }
8673 SKIP(2);
8674
Daniel Veillard46de64e2002-05-29 08:21:33 +00008675 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008676
8677 /*
8678 * We should definitely be at the ending "S? '>'" part
8679 */
8680 GROW;
8681 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008682 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008683 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008684 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008685 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008686
8687 /*
8688 * [ WFC: Element Type Match ]
8689 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008690 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008691 *
8692 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008693 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008694 if (name == NULL) name = BAD_CAST "unparseable";
8695 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008696 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008697 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008698 }
8699
8700 /*
8701 * SAX: End of Tag
8702 */
8703 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8704 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008705 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008706
Daniel Veillarde57ec792003-09-10 10:50:59 +00008707 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008708 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008709 return;
8710}
8711
8712/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008713 * xmlParseEndTag:
8714 * @ctxt: an XML parser context
8715 *
8716 * parse an end of tag
8717 *
8718 * [42] ETag ::= '</' Name S? '>'
8719 *
8720 * With namespace
8721 *
8722 * [NS 9] ETag ::= '</' QName S? '>'
8723 */
8724
8725void
8726xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008727 xmlParseEndTag1(ctxt, 0);
8728}
Daniel Veillard81273902003-09-30 00:43:48 +00008729#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730
8731/************************************************************************
8732 * *
8733 * SAX 2 specific operations *
8734 * *
8735 ************************************************************************/
8736
Daniel Veillard0fb18932003-09-07 09:14:37 +00008737/*
8738 * xmlGetNamespace:
8739 * @ctxt: an XML parser context
8740 * @prefix: the prefix to lookup
8741 *
8742 * Lookup the namespace name for the @prefix (which ca be NULL)
8743 * The prefix must come from the @ctxt->dict dictionnary
8744 *
8745 * Returns the namespace name or NULL if not bound
8746 */
8747static const xmlChar *
8748xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8749 int i;
8750
Daniel Veillarde57ec792003-09-10 10:50:59 +00008751 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008752 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008753 if (ctxt->nsTab[i] == prefix) {
8754 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8755 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008757 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008758 return(NULL);
8759}
8760
8761/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008762 * xmlParseQName:
8763 * @ctxt: an XML parser context
8764 * @prefix: pointer to store the prefix part
8765 *
8766 * parse an XML Namespace QName
8767 *
8768 * [6] QName ::= (Prefix ':')? LocalPart
8769 * [7] Prefix ::= NCName
8770 * [8] LocalPart ::= NCName
8771 *
8772 * Returns the Name parsed or NULL
8773 */
8774
8775static const xmlChar *
8776xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8777 const xmlChar *l, *p;
8778
8779 GROW;
8780
8781 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008782 if (l == NULL) {
8783 if (CUR == ':') {
8784 l = xmlParseName(ctxt);
8785 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008786 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008787 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008788 *prefix = NULL;
8789 return(l);
8790 }
8791 }
8792 return(NULL);
8793 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008794 if (CUR == ':') {
8795 NEXT;
8796 p = l;
8797 l = xmlParseNCName(ctxt);
8798 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008799 xmlChar *tmp;
8800
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008801 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8802 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008803 l = xmlParseNmtoken(ctxt);
8804 if (l == NULL)
8805 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8806 else {
8807 tmp = xmlBuildQName(l, p, NULL, 0);
8808 xmlFree((char *)l);
8809 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008810 p = xmlDictLookup(ctxt->dict, tmp, -1);
8811 if (tmp != NULL) xmlFree(tmp);
8812 *prefix = NULL;
8813 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008814 }
8815 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008816 xmlChar *tmp;
8817
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008818 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8819 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008820 NEXT;
8821 tmp = (xmlChar *) xmlParseName(ctxt);
8822 if (tmp != NULL) {
8823 tmp = xmlBuildQName(tmp, l, NULL, 0);
8824 l = xmlDictLookup(ctxt->dict, tmp, -1);
8825 if (tmp != NULL) xmlFree(tmp);
8826 *prefix = p;
8827 return(l);
8828 }
8829 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8830 l = xmlDictLookup(ctxt->dict, tmp, -1);
8831 if (tmp != NULL) xmlFree(tmp);
8832 *prefix = p;
8833 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008834 }
8835 *prefix = p;
8836 } else
8837 *prefix = NULL;
8838 return(l);
8839}
8840
8841/**
8842 * xmlParseQNameAndCompare:
8843 * @ctxt: an XML parser context
8844 * @name: the localname
8845 * @prefix: the prefix, if any.
8846 *
8847 * parse an XML name and compares for match
8848 * (specialized for endtag parsing)
8849 *
8850 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8851 * and the name for mismatch
8852 */
8853
8854static const xmlChar *
8855xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8856 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008857 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008858 const xmlChar *in;
8859 const xmlChar *ret;
8860 const xmlChar *prefix2;
8861
8862 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8863
8864 GROW;
8865 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008866
Daniel Veillard0fb18932003-09-07 09:14:37 +00008867 cmp = prefix;
8868 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008869 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008870 ++cmp;
8871 }
8872 if ((*cmp == 0) && (*in == ':')) {
8873 in++;
8874 cmp = name;
8875 while (*in != 0 && *in == *cmp) {
8876 ++in;
8877 ++cmp;
8878 }
William M. Brack76e95df2003-10-18 16:20:14 +00008879 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008880 /* success */
8881 ctxt->input->cur = in;
8882 return((const xmlChar*) 1);
8883 }
8884 }
8885 /*
8886 * all strings coms from the dictionary, equality can be done directly
8887 */
8888 ret = xmlParseQName (ctxt, &prefix2);
8889 if ((ret == name) && (prefix == prefix2))
8890 return((const xmlChar*) 1);
8891 return ret;
8892}
8893
8894/**
8895 * xmlParseAttValueInternal:
8896 * @ctxt: an XML parser context
8897 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008898 * @alloc: whether the attribute was reallocated as a new string
8899 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008900 *
8901 * parse a value for an attribute.
8902 * NOTE: if no normalization is needed, the routine will return pointers
8903 * directly from the data buffer.
8904 *
8905 * 3.3.3 Attribute-Value Normalization:
8906 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008907 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008908 * - a character reference is processed by appending the referenced
8909 * character to the attribute value
8910 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008911 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008912 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8913 * appending #x20 to the normalized value, except that only a single
8914 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008915 * parsed entity or the literal entity value of an internal parsed entity
8916 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008917 * If the declared value is not CDATA, then the XML processor must further
8918 * process the normalized attribute value by discarding any leading and
8919 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008920 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008921 * All attributes for which no declaration has been read should be treated
8922 * by a non-validating parser as if declared CDATA.
8923 *
8924 * Returns the AttValue parsed or NULL. The value has to be freed by the
8925 * caller if it was copied, this can be detected by val[*len] == 0.
8926 */
8927
8928static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008929xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8930 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008931{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008932 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008933 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008935 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008936
8937 GROW;
8938 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008939 line = ctxt->input->line;
8940 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008941 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008942 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008943 return (NULL);
8944 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008945 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008946
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008947 /*
8948 * try to handle in this routine the most common case where no
8949 * allocation of a new string is required and where content is
8950 * pure ASCII.
8951 */
8952 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008953 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008954 end = ctxt->input->end;
8955 start = in;
8956 if (in >= end) {
8957 const xmlChar *oldbase = ctxt->input->base;
8958 GROW;
8959 if (oldbase != ctxt->input->base) {
8960 long delta = ctxt->input->base - oldbase;
8961 start = start + delta;
8962 in = in + delta;
8963 }
8964 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008965 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008966 if (normalize) {
8967 /*
8968 * Skip any leading spaces
8969 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008970 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008971 ((*in == 0x20) || (*in == 0x9) ||
8972 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008973 if (*in == 0xA) {
8974 line++; col = 1;
8975 } else {
8976 col++;
8977 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008978 in++;
8979 start = in;
8980 if (in >= end) {
8981 const xmlChar *oldbase = ctxt->input->base;
8982 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008983 if (ctxt->instate == XML_PARSER_EOF)
8984 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008985 if (oldbase != ctxt->input->base) {
8986 long delta = ctxt->input->base - oldbase;
8987 start = start + delta;
8988 in = in + delta;
8989 }
8990 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008991 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8992 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8993 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008994 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008995 return(NULL);
8996 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008997 }
8998 }
8999 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9000 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009001 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009002 if ((*in++ == 0x20) && (*in == 0x20)) break;
9003 if (in >= end) {
9004 const xmlChar *oldbase = ctxt->input->base;
9005 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009006 if (ctxt->instate == XML_PARSER_EOF)
9007 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009008 if (oldbase != ctxt->input->base) {
9009 long delta = ctxt->input->base - oldbase;
9010 start = start + delta;
9011 in = in + delta;
9012 }
9013 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009014 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9015 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9016 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009017 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009018 return(NULL);
9019 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009020 }
9021 }
9022 last = in;
9023 /*
9024 * skip the trailing blanks
9025 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009026 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009027 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009028 ((*in == 0x20) || (*in == 0x9) ||
9029 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009030 if (*in == 0xA) {
9031 line++, col = 1;
9032 } else {
9033 col++;
9034 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009035 in++;
9036 if (in >= end) {
9037 const xmlChar *oldbase = ctxt->input->base;
9038 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009039 if (ctxt->instate == XML_PARSER_EOF)
9040 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009041 if (oldbase != ctxt->input->base) {
9042 long delta = ctxt->input->base - oldbase;
9043 start = start + delta;
9044 in = in + delta;
9045 last = last + delta;
9046 }
9047 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009048 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9049 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9050 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009051 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009052 return(NULL);
9053 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009054 }
9055 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009056 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9057 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9058 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009059 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009060 return(NULL);
9061 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009062 if (*in != limit) goto need_complex;
9063 } else {
9064 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9065 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9066 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009067 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009068 if (in >= end) {
9069 const xmlChar *oldbase = ctxt->input->base;
9070 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009071 if (ctxt->instate == XML_PARSER_EOF)
9072 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009073 if (oldbase != ctxt->input->base) {
9074 long delta = ctxt->input->base - oldbase;
9075 start = start + delta;
9076 in = in + delta;
9077 }
9078 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009079 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9080 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9081 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009082 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009083 return(NULL);
9084 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009085 }
9086 }
9087 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009088 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9089 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9090 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009091 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009092 return(NULL);
9093 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009094 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009095 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009096 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009097 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009098 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009099 *len = last - start;
9100 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009101 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009102 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009103 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009104 }
9105 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009106 ctxt->input->line = line;
9107 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009108 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009109 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009110need_complex:
9111 if (alloc) *alloc = 1;
9112 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009113}
9114
9115/**
9116 * xmlParseAttribute2:
9117 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009118 * @pref: the element prefix
9119 * @elem: the element name
9120 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009121 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009122 * @len: an int * to save the length of the attribute
9123 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009124 *
9125 * parse an attribute in the new SAX2 framework.
9126 *
9127 * Returns the attribute name, and the value in *value, .
9128 */
9129
9130static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009131xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009132 const xmlChar * pref, const xmlChar * elem,
9133 const xmlChar ** prefix, xmlChar ** value,
9134 int *len, int *alloc)
9135{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009136 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009137 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009138 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009139
9140 *value = NULL;
9141 GROW;
9142 name = xmlParseQName(ctxt, prefix);
9143 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009144 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9145 "error parsing attribute name\n");
9146 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009147 }
9148
9149 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009150 * get the type if needed
9151 */
9152 if (ctxt->attsSpecial != NULL) {
9153 int type;
9154
9155 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009156 pref, elem, *prefix, name);
9157 if (type != 0)
9158 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009159 }
9160
9161 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009162 * read the value
9163 */
9164 SKIP_BLANKS;
9165 if (RAW == '=') {
9166 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009167 SKIP_BLANKS;
9168 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9169 if (normalize) {
9170 /*
9171 * Sometimes a second normalisation pass for spaces is needed
9172 * but that only happens if charrefs or entities refernces
9173 * have been used in the attribute value, i.e. the attribute
9174 * value have been extracted in an allocated string already.
9175 */
9176 if (*alloc) {
9177 const xmlChar *val2;
9178
9179 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009180 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009181 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009182 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009183 }
9184 }
9185 }
9186 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009187 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009188 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9189 "Specification mandate value for attribute %s\n",
9190 name);
9191 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009192 }
9193
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009194 if (*prefix == ctxt->str_xml) {
9195 /*
9196 * Check that xml:lang conforms to the specification
9197 * No more registered as an error, just generate a warning now
9198 * since this was deprecated in XML second edition
9199 */
9200 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9201 internal_val = xmlStrndup(val, *len);
9202 if (!xmlCheckLanguageID(internal_val)) {
9203 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9204 "Malformed value for xml:lang : %s\n",
9205 internal_val, NULL);
9206 }
9207 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009208
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009209 /*
9210 * Check that xml:space conforms to the specification
9211 */
9212 if (xmlStrEqual(name, BAD_CAST "space")) {
9213 internal_val = xmlStrndup(val, *len);
9214 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9215 *(ctxt->space) = 0;
9216 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9217 *(ctxt->space) = 1;
9218 else {
9219 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9220 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9221 internal_val, NULL);
9222 }
9223 }
9224 if (internal_val) {
9225 xmlFree(internal_val);
9226 }
9227 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009228
9229 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009230 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009231}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009232/**
9233 * xmlParseStartTag2:
9234 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009235 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009236 * parse a start of tag either for rule element or
9237 * EmptyElement. In both case we don't parse the tag closing chars.
9238 * This routine is called when running SAX2 parsing
9239 *
9240 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9241 *
9242 * [ WFC: Unique Att Spec ]
9243 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009244 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009245 *
9246 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9247 *
9248 * [ WFC: Unique Att Spec ]
9249 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009250 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009251 *
9252 * With namespace:
9253 *
9254 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9255 *
9256 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9257 *
9258 * Returns the element name parsed
9259 */
9260
9261static const xmlChar *
9262xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009263 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009264 const xmlChar *localname;
9265 const xmlChar *prefix;
9266 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009267 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009268 const xmlChar *nsname;
9269 xmlChar *attvalue;
9270 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009271 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009272 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009273 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009274 const xmlChar *base;
9275 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009276 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009277
9278 if (RAW != '<') return(NULL);
9279 NEXT1;
9280
9281 /*
9282 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9283 * point since the attribute values may be stored as pointers to
9284 * the buffer and calling SHRINK would destroy them !
9285 * The Shrinking is only possible once the full set of attribute
9286 * callbacks have been done.
9287 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009288reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009289 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009290 base = ctxt->input->base;
9291 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009292 oldline = ctxt->input->line;
9293 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009294 nbatts = 0;
9295 nratts = 0;
9296 nbdef = 0;
9297 nbNs = 0;
9298 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009299 /* Forget any namespaces added during an earlier parse of this element. */
9300 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009301
9302 localname = xmlParseQName(ctxt, &prefix);
9303 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009304 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9305 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009306 return(NULL);
9307 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009308 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009309
9310 /*
9311 * Now parse the attributes, it ends up with the ending
9312 *
9313 * (S Attribute)* S?
9314 */
9315 SKIP_BLANKS;
9316 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009317 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009318
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009319 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009320 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009321 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009322 const xmlChar *q = CUR_PTR;
9323 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009324 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009325
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009326 attname = xmlParseAttribute2(ctxt, prefix, localname,
9327 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009328 if (ctxt->input->base != base) {
9329 if ((attvalue != NULL) && (alloc != 0))
9330 xmlFree(attvalue);
9331 attvalue = NULL;
9332 goto base_changed;
9333 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009334 if ((attname != NULL) && (attvalue != NULL)) {
9335 if (len < 0) len = xmlStrlen(attvalue);
9336 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009337 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9338 xmlURIPtr uri;
9339
Daniel Veillardc836ba62014-07-14 16:39:50 +08009340 if (URL == NULL) {
9341 xmlErrMemory(ctxt, "dictionary allocation failure");
9342 if ((attvalue != NULL) && (alloc != 0))
9343 xmlFree(attvalue);
9344 return(NULL);
9345 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009346 if (*URL != 0) {
9347 uri = xmlParseURI((const char *) URL);
9348 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009349 xmlNsErr(ctxt, XML_WAR_NS_URI,
9350 "xmlns: '%s' is not a valid URI\n",
9351 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009352 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009353 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009354 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9355 "xmlns: URI %s is not absolute\n",
9356 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009357 }
9358 xmlFreeURI(uri);
9359 }
Daniel Veillard37334572008-07-31 08:20:02 +00009360 if (URL == ctxt->str_xml_ns) {
9361 if (attname != ctxt->str_xml) {
9362 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9363 "xml namespace URI cannot be the default namespace\n",
9364 NULL, NULL, NULL);
9365 }
9366 goto skip_default_ns;
9367 }
9368 if ((len == 29) &&
9369 (xmlStrEqual(URL,
9370 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9371 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9372 "reuse of the xmlns namespace name is forbidden\n",
9373 NULL, NULL, NULL);
9374 goto skip_default_ns;
9375 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009376 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009377 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009378 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009379 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009380 for (j = 1;j <= nbNs;j++)
9381 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9382 break;
9383 if (j <= nbNs)
9384 xmlErrAttributeDup(ctxt, NULL, attname);
9385 else
9386 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009387skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009388 if (alloc != 0) xmlFree(attvalue);
9389 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009390 continue;
9391 }
9392 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009393 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9394 xmlURIPtr uri;
9395
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009396 if (attname == ctxt->str_xml) {
9397 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009398 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9399 "xml namespace prefix mapped to wrong URI\n",
9400 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009401 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009402 /*
9403 * Do not keep a namespace definition node
9404 */
Daniel Veillard37334572008-07-31 08:20:02 +00009405 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009406 }
Daniel Veillard37334572008-07-31 08:20:02 +00009407 if (URL == ctxt->str_xml_ns) {
9408 if (attname != ctxt->str_xml) {
9409 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9410 "xml namespace URI mapped to wrong prefix\n",
9411 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009412 }
Daniel Veillard37334572008-07-31 08:20:02 +00009413 goto skip_ns;
9414 }
9415 if (attname == ctxt->str_xmlns) {
9416 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9417 "redefinition of the xmlns prefix is forbidden\n",
9418 NULL, NULL, NULL);
9419 goto skip_ns;
9420 }
9421 if ((len == 29) &&
9422 (xmlStrEqual(URL,
9423 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9424 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9425 "reuse of the xmlns namespace name is forbidden\n",
9426 NULL, NULL, NULL);
9427 goto skip_ns;
9428 }
9429 if ((URL == NULL) || (URL[0] == 0)) {
9430 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9431 "xmlns:%s: Empty XML namespace is not allowed\n",
9432 attname, NULL, NULL);
9433 goto skip_ns;
9434 } else {
9435 uri = xmlParseURI((const char *) URL);
9436 if (uri == NULL) {
9437 xmlNsErr(ctxt, XML_WAR_NS_URI,
9438 "xmlns:%s: '%s' is not a valid URI\n",
9439 attname, URL, NULL);
9440 } else {
9441 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9442 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9443 "xmlns:%s: URI %s is not absolute\n",
9444 attname, URL, NULL);
9445 }
9446 xmlFreeURI(uri);
9447 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009448 }
9449
Daniel Veillard0fb18932003-09-07 09:14:37 +00009450 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009451 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009452 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009453 for (j = 1;j <= nbNs;j++)
9454 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9455 break;
9456 if (j <= nbNs)
9457 xmlErrAttributeDup(ctxt, aprefix, attname);
9458 else
9459 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009460skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009461 if (alloc != 0) xmlFree(attvalue);
9462 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009463 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009464 continue;
9465 }
9466
9467 /*
9468 * Add the pair to atts
9469 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009470 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9471 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009472 if (attvalue[len] == 0)
9473 xmlFree(attvalue);
9474 goto failed;
9475 }
9476 maxatts = ctxt->maxatts;
9477 atts = ctxt->atts;
9478 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009479 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009480 atts[nbatts++] = attname;
9481 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009482 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009483 atts[nbatts++] = attvalue;
9484 attvalue += len;
9485 atts[nbatts++] = attvalue;
9486 /*
9487 * tag if some deallocation is needed
9488 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009489 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009490 } else {
9491 if ((attvalue != NULL) && (attvalue[len] == 0))
9492 xmlFree(attvalue);
9493 }
9494
Daniel Veillard37334572008-07-31 08:20:02 +00009495failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009496
9497 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009498 if (ctxt->instate == XML_PARSER_EOF)
9499 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009500 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009501 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9502 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009503 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009504 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9505 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009506 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009507 }
9508 SKIP_BLANKS;
9509 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9510 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009511 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009512 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009513 break;
9514 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009515 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009516 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009517 }
9518
Daniel Veillard0fb18932003-09-07 09:14:37 +00009519 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009520 * The attributes defaulting
9521 */
9522 if (ctxt->attsDefault != NULL) {
9523 xmlDefAttrsPtr defaults;
9524
9525 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9526 if (defaults != NULL) {
9527 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009528 attname = defaults->values[5 * i];
9529 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009530
9531 /*
9532 * special work for namespaces defaulted defs
9533 */
9534 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9535 /*
9536 * check that it's not a defined namespace
9537 */
9538 for (j = 1;j <= nbNs;j++)
9539 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9540 break;
9541 if (j <= nbNs) continue;
9542
9543 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009544 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009545 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009546 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009547 nbNs++;
9548 }
9549 } else if (aprefix == ctxt->str_xmlns) {
9550 /*
9551 * check that it's not a defined namespace
9552 */
9553 for (j = 1;j <= nbNs;j++)
9554 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9555 break;
9556 if (j <= nbNs) continue;
9557
9558 nsname = xmlGetNamespace(ctxt, attname);
9559 if (nsname != defaults->values[2]) {
9560 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009561 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009562 nbNs++;
9563 }
9564 } else {
9565 /*
9566 * check that it's not a defined attribute
9567 */
9568 for (j = 0;j < nbatts;j+=5) {
9569 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9570 break;
9571 }
9572 if (j < nbatts) continue;
9573
9574 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9575 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009576 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009577 }
9578 maxatts = ctxt->maxatts;
9579 atts = ctxt->atts;
9580 }
9581 atts[nbatts++] = attname;
9582 atts[nbatts++] = aprefix;
9583 if (aprefix == NULL)
9584 atts[nbatts++] = NULL;
9585 else
9586 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009587 atts[nbatts++] = defaults->values[5 * i + 2];
9588 atts[nbatts++] = defaults->values[5 * i + 3];
9589 if ((ctxt->standalone == 1) &&
9590 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009591 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009592 "standalone: attribute %s on %s defaulted from external subset\n",
9593 attname, localname);
9594 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009595 nbdef++;
9596 }
9597 }
9598 }
9599 }
9600
Daniel Veillarde70c8772003-11-25 07:21:18 +00009601 /*
9602 * The attributes checkings
9603 */
9604 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009605 /*
9606 * The default namespace does not apply to attribute names.
9607 */
9608 if (atts[i + 1] != NULL) {
9609 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9610 if (nsname == NULL) {
9611 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9612 "Namespace prefix %s for %s on %s is not defined\n",
9613 atts[i + 1], atts[i], localname);
9614 }
9615 atts[i + 2] = nsname;
9616 } else
9617 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009618 /*
9619 * [ WFC: Unique Att Spec ]
9620 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009621 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009622 * As extended by the Namespace in XML REC.
9623 */
9624 for (j = 0; j < i;j += 5) {
9625 if (atts[i] == atts[j]) {
9626 if (atts[i+1] == atts[j+1]) {
9627 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9628 break;
9629 }
9630 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9631 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9632 "Namespaced Attribute %s in '%s' redefined\n",
9633 atts[i], nsname, NULL);
9634 break;
9635 }
9636 }
9637 }
9638 }
9639
Daniel Veillarde57ec792003-09-10 10:50:59 +00009640 nsname = xmlGetNamespace(ctxt, prefix);
9641 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009642 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9643 "Namespace prefix %s on %s is not defined\n",
9644 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009645 }
9646 *pref = prefix;
9647 *URI = nsname;
9648
9649 /*
9650 * SAX: Start of Element !
9651 */
9652 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9653 (!ctxt->disableSAX)) {
9654 if (nbNs > 0)
9655 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9656 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9657 nbatts / 5, nbdef, atts);
9658 else
9659 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9660 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9661 }
9662
9663 /*
9664 * Free up attribute allocated strings if needed
9665 */
9666 if (attval != 0) {
9667 for (i = 3,j = 0; j < nratts;i += 5,j++)
9668 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9669 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009670 }
9671
9672 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009673
9674base_changed:
9675 /*
9676 * the attribute strings are valid iif the base didn't changed
9677 */
9678 if (attval != 0) {
9679 for (i = 3,j = 0; j < nratts;i += 5,j++)
9680 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9681 xmlFree((xmlChar *) atts[i]);
9682 }
9683 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009684 ctxt->input->line = oldline;
9685 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009686 if (ctxt->wellFormed == 1) {
9687 goto reparse;
9688 }
9689 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009690}
9691
9692/**
9693 * xmlParseEndTag2:
9694 * @ctxt: an XML parser context
9695 * @line: line of the start tag
9696 * @nsNr: number of namespaces on the start tag
9697 *
9698 * parse an end of tag
9699 *
9700 * [42] ETag ::= '</' Name S? '>'
9701 *
9702 * With namespace
9703 *
9704 * [NS 9] ETag ::= '</' QName S? '>'
9705 */
9706
9707static void
9708xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009709 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009710 const xmlChar *name;
9711
9712 GROW;
9713 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009714 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009715 return;
9716 }
9717 SKIP(2);
9718
William M. Brack13dfa872004-09-18 04:52:08 +00009719 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009720 if (ctxt->input->cur[tlen] == '>') {
9721 ctxt->input->cur += tlen + 1;
9722 goto done;
9723 }
9724 ctxt->input->cur += tlen;
9725 name = (xmlChar*)1;
9726 } else {
9727 if (prefix == NULL)
9728 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9729 else
9730 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9731 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009732
9733 /*
9734 * We should definitely be at the ending "S? '>'" part
9735 */
9736 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009737 if (ctxt->instate == XML_PARSER_EOF)
9738 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009739 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009740 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009741 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009742 } else
9743 NEXT1;
9744
9745 /*
9746 * [ WFC: Element Type Match ]
9747 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009748 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009749 *
9750 */
9751 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009752 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009753 if ((line == 0) && (ctxt->node != NULL))
9754 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009755 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009756 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009757 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009758 }
9759
9760 /*
9761 * SAX: End of Tag
9762 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009763done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009764 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9765 (!ctxt->disableSAX))
9766 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9767
Daniel Veillard0fb18932003-09-07 09:14:37 +00009768 spacePop(ctxt);
9769 if (nsNr != 0)
9770 nsPop(ctxt, nsNr);
9771 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009772}
9773
9774/**
Owen Taylor3473f882001-02-23 17:55:21 +00009775 * xmlParseCDSect:
9776 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009777 *
Owen Taylor3473f882001-02-23 17:55:21 +00009778 * Parse escaped pure raw content.
9779 *
9780 * [18] CDSect ::= CDStart CData CDEnd
9781 *
9782 * [19] CDStart ::= '<![CDATA['
9783 *
9784 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9785 *
9786 * [21] CDEnd ::= ']]>'
9787 */
9788void
9789xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9790 xmlChar *buf = NULL;
9791 int len = 0;
9792 int size = XML_PARSER_BUFFER_SIZE;
9793 int r, rl;
9794 int s, sl;
9795 int cur, l;
9796 int count = 0;
9797
Daniel Veillard8f597c32003-10-06 08:19:27 +00009798 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009799 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009800 SKIP(9);
9801 } else
9802 return;
9803
9804 ctxt->instate = XML_PARSER_CDATA_SECTION;
9805 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009806 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009807 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009808 ctxt->instate = XML_PARSER_CONTENT;
9809 return;
9810 }
9811 NEXTL(rl);
9812 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009813 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009814 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009815 ctxt->instate = XML_PARSER_CONTENT;
9816 return;
9817 }
9818 NEXTL(sl);
9819 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009820 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009821 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009822 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009823 return;
9824 }
William M. Brack871611b2003-10-18 04:53:14 +00009825 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009826 ((r != ']') || (s != ']') || (cur != '>'))) {
9827 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009828 xmlChar *tmp;
9829
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009830 if ((size > XML_MAX_TEXT_LENGTH) &&
9831 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9832 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9833 "CData section too big found", NULL);
9834 xmlFree (buf);
9835 return;
9836 }
9837 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009838 if (tmp == NULL) {
9839 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009840 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009841 return;
9842 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009843 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009844 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009845 }
9846 COPY_BUF(rl,buf,len,r);
9847 r = s;
9848 rl = sl;
9849 s = cur;
9850 sl = l;
9851 count++;
9852 if (count > 50) {
9853 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009854 if (ctxt->instate == XML_PARSER_EOF) {
9855 xmlFree(buf);
9856 return;
9857 }
Owen Taylor3473f882001-02-23 17:55:21 +00009858 count = 0;
9859 }
9860 NEXTL(l);
9861 cur = CUR_CHAR(l);
9862 }
9863 buf[len] = 0;
9864 ctxt->instate = XML_PARSER_CONTENT;
9865 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009866 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009867 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009868 xmlFree(buf);
9869 return;
9870 }
9871 NEXTL(l);
9872
9873 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009874 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009875 */
9876 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9877 if (ctxt->sax->cdataBlock != NULL)
9878 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009879 else if (ctxt->sax->characters != NULL)
9880 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009881 }
9882 xmlFree(buf);
9883}
9884
9885/**
9886 * xmlParseContent:
9887 * @ctxt: an XML parser context
9888 *
9889 * Parse a content:
9890 *
9891 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9892 */
9893
9894void
9895xmlParseContent(xmlParserCtxtPtr ctxt) {
9896 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009897 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009898 ((RAW != '<') || (NXT(1) != '/')) &&
9899 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009900 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009901 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009902 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009903
9904 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009905 * First case : a Processing Instruction.
9906 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009907 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009908 xmlParsePI(ctxt);
9909 }
9910
9911 /*
9912 * Second case : a CDSection
9913 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009914 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009915 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009916 xmlParseCDSect(ctxt);
9917 }
9918
9919 /*
9920 * Third case : a comment
9921 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009922 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009923 (NXT(2) == '-') && (NXT(3) == '-')) {
9924 xmlParseComment(ctxt);
9925 ctxt->instate = XML_PARSER_CONTENT;
9926 }
9927
9928 /*
9929 * Fourth case : a sub-element.
9930 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009931 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009932 xmlParseElement(ctxt);
9933 }
9934
9935 /*
9936 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009937 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009938 */
9939
Daniel Veillard21a0f912001-02-25 19:54:14 +00009940 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009941 xmlParseReference(ctxt);
9942 }
9943
9944 /*
9945 * Last case, text. Note that References are handled directly.
9946 */
9947 else {
9948 xmlParseCharData(ctxt, 0);
9949 }
9950
9951 GROW;
9952 /*
9953 * Pop-up of finished entities.
9954 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009955 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009956 xmlPopInput(ctxt);
9957 SHRINK;
9958
Daniel Veillardfdc91562002-07-01 21:52:03 +00009959 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009960 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9961 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009962 ctxt->instate = XML_PARSER_EOF;
9963 break;
9964 }
9965 }
9966}
9967
9968/**
9969 * xmlParseElement:
9970 * @ctxt: an XML parser context
9971 *
9972 * parse an XML element, this is highly recursive
9973 *
9974 * [39] element ::= EmptyElemTag | STag content ETag
9975 *
9976 * [ WFC: Element Type Match ]
9977 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009978 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009979 *
Owen Taylor3473f882001-02-23 17:55:21 +00009980 */
9981
9982void
9983xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009984 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009985 const xmlChar *prefix = NULL;
9986 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009987 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009988 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009989 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009990 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009991
Daniel Veillard8915c152008-08-26 13:05:34 +00009992 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9993 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9994 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9995 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9996 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009997 ctxt->instate = XML_PARSER_EOF;
9998 return;
9999 }
10000
Owen Taylor3473f882001-02-23 17:55:21 +000010001 /* Capture start position */
10002 if (ctxt->record_info) {
10003 node_info.begin_pos = ctxt->input->consumed +
10004 (CUR_PTR - ctxt->input->base);
10005 node_info.begin_line = ctxt->input->line;
10006 }
10007
10008 if (ctxt->spaceNr == 0)
10009 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010010 else if (*ctxt->space == -2)
10011 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010012 else
10013 spacePush(ctxt, *ctxt->space);
10014
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010015 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010016#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010017 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010018#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010019 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010020#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010021 else
10022 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010023#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010024 if (ctxt->instate == XML_PARSER_EOF)
10025 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010026 if (name == NULL) {
10027 spacePop(ctxt);
10028 return;
10029 }
10030 namePush(ctxt, name);
10031 ret = ctxt->node;
10032
Daniel Veillard4432df22003-09-28 18:58:27 +000010033#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010034 /*
10035 * [ VC: Root Element Type ]
10036 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010037 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010038 */
10039 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10040 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10041 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010042#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010043
10044 /*
10045 * Check for an Empty Element.
10046 */
10047 if ((RAW == '/') && (NXT(1) == '>')) {
10048 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010049 if (ctxt->sax2) {
10050 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10051 (!ctxt->disableSAX))
10052 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010053#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010054 } else {
10055 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10056 (!ctxt->disableSAX))
10057 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010058#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010059 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010060 namePop(ctxt);
10061 spacePop(ctxt);
10062 if (nsNr != ctxt->nsNr)
10063 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010064 if ( ret != NULL && ctxt->record_info ) {
10065 node_info.end_pos = ctxt->input->consumed +
10066 (CUR_PTR - ctxt->input->base);
10067 node_info.end_line = ctxt->input->line;
10068 node_info.node = ret;
10069 xmlParserAddNodeInfo(ctxt, &node_info);
10070 }
10071 return;
10072 }
10073 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010074 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010075 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010076 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10077 "Couldn't find end of Start Tag %s line %d\n",
10078 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010079
10080 /*
10081 * end of parsing of this node.
10082 */
10083 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010084 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010085 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010086 if (nsNr != ctxt->nsNr)
10087 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010088
10089 /*
10090 * Capture end position and add node
10091 */
10092 if ( ret != NULL && ctxt->record_info ) {
10093 node_info.end_pos = ctxt->input->consumed +
10094 (CUR_PTR - ctxt->input->base);
10095 node_info.end_line = ctxt->input->line;
10096 node_info.node = ret;
10097 xmlParserAddNodeInfo(ctxt, &node_info);
10098 }
10099 return;
10100 }
10101
10102 /*
10103 * Parse the content of the element:
10104 */
10105 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010106 if (ctxt->instate == XML_PARSER_EOF)
10107 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010108 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010109 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010110 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010111 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010112
10113 /*
10114 * end of parsing of this node.
10115 */
10116 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010117 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010118 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010119 if (nsNr != ctxt->nsNr)
10120 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010121 return;
10122 }
10123
10124 /*
10125 * parse the end of tag: '</' should be here.
10126 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010127 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010128 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010129 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010130 }
10131#ifdef LIBXML_SAX1_ENABLED
10132 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010133 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010134#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010135
10136 /*
10137 * Capture end position and add node
10138 */
10139 if ( ret != NULL && ctxt->record_info ) {
10140 node_info.end_pos = ctxt->input->consumed +
10141 (CUR_PTR - ctxt->input->base);
10142 node_info.end_line = ctxt->input->line;
10143 node_info.node = ret;
10144 xmlParserAddNodeInfo(ctxt, &node_info);
10145 }
10146}
10147
10148/**
10149 * xmlParseVersionNum:
10150 * @ctxt: an XML parser context
10151 *
10152 * parse the XML version value.
10153 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010154 * [26] VersionNum ::= '1.' [0-9]+
10155 *
10156 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010157 *
10158 * Returns the string giving the XML version number, or NULL
10159 */
10160xmlChar *
10161xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10162 xmlChar *buf = NULL;
10163 int len = 0;
10164 int size = 10;
10165 xmlChar cur;
10166
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010167 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010168 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010169 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010170 return(NULL);
10171 }
10172 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010173 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010174 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010175 return(NULL);
10176 }
10177 buf[len++] = cur;
10178 NEXT;
10179 cur=CUR;
10180 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010181 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010182 return(NULL);
10183 }
10184 buf[len++] = cur;
10185 NEXT;
10186 cur=CUR;
10187 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010188 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010189 xmlChar *tmp;
10190
Owen Taylor3473f882001-02-23 17:55:21 +000010191 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010192 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10193 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010194 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010195 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010196 return(NULL);
10197 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010198 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010199 }
10200 buf[len++] = cur;
10201 NEXT;
10202 cur=CUR;
10203 }
10204 buf[len] = 0;
10205 return(buf);
10206}
10207
10208/**
10209 * xmlParseVersionInfo:
10210 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010211 *
Owen Taylor3473f882001-02-23 17:55:21 +000010212 * parse the XML version.
10213 *
10214 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010215 *
Owen Taylor3473f882001-02-23 17:55:21 +000010216 * [25] Eq ::= S? '=' S?
10217 *
10218 * Returns the version string, e.g. "1.0"
10219 */
10220
10221xmlChar *
10222xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10223 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010224
Daniel Veillarda07050d2003-10-19 14:46:32 +000010225 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010226 SKIP(7);
10227 SKIP_BLANKS;
10228 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010229 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010230 return(NULL);
10231 }
10232 NEXT;
10233 SKIP_BLANKS;
10234 if (RAW == '"') {
10235 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010236 version = xmlParseVersionNum(ctxt);
10237 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010238 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010239 } else
10240 NEXT;
10241 } else if (RAW == '\''){
10242 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010243 version = xmlParseVersionNum(ctxt);
10244 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010245 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010246 } else
10247 NEXT;
10248 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010249 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010250 }
10251 }
10252 return(version);
10253}
10254
10255/**
10256 * xmlParseEncName:
10257 * @ctxt: an XML parser context
10258 *
10259 * parse the XML encoding name
10260 *
10261 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10262 *
10263 * Returns the encoding name value or NULL
10264 */
10265xmlChar *
10266xmlParseEncName(xmlParserCtxtPtr ctxt) {
10267 xmlChar *buf = NULL;
10268 int len = 0;
10269 int size = 10;
10270 xmlChar cur;
10271
10272 cur = CUR;
10273 if (((cur >= 'a') && (cur <= 'z')) ||
10274 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010275 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010276 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010277 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010278 return(NULL);
10279 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010280
Owen Taylor3473f882001-02-23 17:55:21 +000010281 buf[len++] = cur;
10282 NEXT;
10283 cur = CUR;
10284 while (((cur >= 'a') && (cur <= 'z')) ||
10285 ((cur >= 'A') && (cur <= 'Z')) ||
10286 ((cur >= '0') && (cur <= '9')) ||
10287 (cur == '.') || (cur == '_') ||
10288 (cur == '-')) {
10289 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010290 xmlChar *tmp;
10291
Owen Taylor3473f882001-02-23 17:55:21 +000010292 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010293 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10294 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010295 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010296 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010297 return(NULL);
10298 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010299 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010300 }
10301 buf[len++] = cur;
10302 NEXT;
10303 cur = CUR;
10304 if (cur == 0) {
10305 SHRINK;
10306 GROW;
10307 cur = CUR;
10308 }
10309 }
10310 buf[len] = 0;
10311 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010312 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010313 }
10314 return(buf);
10315}
10316
10317/**
10318 * xmlParseEncodingDecl:
10319 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010320 *
Owen Taylor3473f882001-02-23 17:55:21 +000010321 * parse the XML encoding declaration
10322 *
10323 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10324 *
10325 * this setups the conversion filters.
10326 *
10327 * Returns the encoding value or NULL
10328 */
10329
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010330const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010331xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10332 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010333
10334 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010335 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010336 SKIP(8);
10337 SKIP_BLANKS;
10338 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010339 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010340 return(NULL);
10341 }
10342 NEXT;
10343 SKIP_BLANKS;
10344 if (RAW == '"') {
10345 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010346 encoding = xmlParseEncName(ctxt);
10347 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010348 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010349 } else
10350 NEXT;
10351 } else if (RAW == '\''){
10352 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010353 encoding = xmlParseEncName(ctxt);
10354 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010355 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010356 } else
10357 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010358 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010359 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010360 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010361
10362 /*
10363 * Non standard parsing, allowing the user to ignore encoding
10364 */
10365 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10366 return(encoding);
10367
Daniel Veillard6b621b82003-08-11 15:03:34 +000010368 /*
10369 * UTF-16 encoding stwich has already taken place at this stage,
10370 * more over the little-endian/big-endian selection is already done
10371 */
10372 if ((encoding != NULL) &&
10373 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10374 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010375 /*
10376 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010377 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010378 * document is apparently UTF-8 compatible, then raise an
10379 * encoding mismatch fatal error
10380 */
10381 if ((ctxt->encoding == NULL) &&
10382 (ctxt->input->buf != NULL) &&
10383 (ctxt->input->buf->encoder == NULL)) {
10384 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10385 "Document labelled UTF-16 but has UTF-8 content\n");
10386 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010387 if (ctxt->encoding != NULL)
10388 xmlFree((xmlChar *) ctxt->encoding);
10389 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010390 }
10391 /*
10392 * UTF-8 encoding is handled natively
10393 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010394 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010395 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10396 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010397 if (ctxt->encoding != NULL)
10398 xmlFree((xmlChar *) ctxt->encoding);
10399 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010400 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010401 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010402 xmlCharEncodingHandlerPtr handler;
10403
10404 if (ctxt->input->encoding != NULL)
10405 xmlFree((xmlChar *) ctxt->input->encoding);
10406 ctxt->input->encoding = encoding;
10407
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010408 handler = xmlFindCharEncodingHandler((const char *) encoding);
10409 if (handler != NULL) {
10410 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010411 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010412 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010413 "Unsupported encoding %s\n", encoding);
10414 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010415 }
10416 }
10417 }
10418 return(encoding);
10419}
10420
10421/**
10422 * xmlParseSDDecl:
10423 * @ctxt: an XML parser context
10424 *
10425 * parse the XML standalone declaration
10426 *
10427 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010428 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010429 *
10430 * [ VC: Standalone Document Declaration ]
10431 * TODO The standalone document declaration must have the value "no"
10432 * if any external markup declarations contain declarations of:
10433 * - attributes with default values, if elements to which these
10434 * attributes apply appear in the document without specifications
10435 * of values for these attributes, or
10436 * - entities (other than amp, lt, gt, apos, quot), if references
10437 * to those entities appear in the document, or
10438 * - attributes with values subject to normalization, where the
10439 * attribute appears in the document with a value which will change
10440 * as a result of normalization, or
10441 * - element types with element content, if white space occurs directly
10442 * within any instance of those types.
10443 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010444 * Returns:
10445 * 1 if standalone="yes"
10446 * 0 if standalone="no"
10447 * -2 if standalone attribute is missing or invalid
10448 * (A standalone value of -2 means that the XML declaration was found,
10449 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010450 */
10451
10452int
10453xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010454 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010455
10456 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010457 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010458 SKIP(10);
10459 SKIP_BLANKS;
10460 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010461 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010462 return(standalone);
10463 }
10464 NEXT;
10465 SKIP_BLANKS;
10466 if (RAW == '\''){
10467 NEXT;
10468 if ((RAW == 'n') && (NXT(1) == 'o')) {
10469 standalone = 0;
10470 SKIP(2);
10471 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10472 (NXT(2) == 's')) {
10473 standalone = 1;
10474 SKIP(3);
10475 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010476 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010477 }
10478 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010479 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010480 } else
10481 NEXT;
10482 } else if (RAW == '"'){
10483 NEXT;
10484 if ((RAW == 'n') && (NXT(1) == 'o')) {
10485 standalone = 0;
10486 SKIP(2);
10487 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10488 (NXT(2) == 's')) {
10489 standalone = 1;
10490 SKIP(3);
10491 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010492 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010493 }
10494 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010495 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010496 } else
10497 NEXT;
10498 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010499 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010500 }
10501 }
10502 return(standalone);
10503}
10504
10505/**
10506 * xmlParseXMLDecl:
10507 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010508 *
Owen Taylor3473f882001-02-23 17:55:21 +000010509 * parse an XML declaration header
10510 *
10511 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10512 */
10513
10514void
10515xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10516 xmlChar *version;
10517
10518 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010519 * This value for standalone indicates that the document has an
10520 * XML declaration but it does not have a standalone attribute.
10521 * It will be overwritten later if a standalone attribute is found.
10522 */
10523 ctxt->input->standalone = -2;
10524
10525 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010526 * We know that '<?xml' is here.
10527 */
10528 SKIP(5);
10529
William M. Brack76e95df2003-10-18 16:20:14 +000010530 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010531 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10532 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010533 }
10534 SKIP_BLANKS;
10535
10536 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010537 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010538 */
10539 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010540 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010541 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010542 } else {
10543 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10544 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010545 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010546 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010547 if (ctxt->options & XML_PARSE_OLD10) {
10548 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10549 "Unsupported version '%s'\n",
10550 version);
10551 } else {
10552 if ((version[0] == '1') && ((version[1] == '.'))) {
10553 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10554 "Unsupported version '%s'\n",
10555 version, NULL);
10556 } else {
10557 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10558 "Unsupported version '%s'\n",
10559 version);
10560 }
10561 }
Daniel Veillard19840942001-11-29 16:11:38 +000010562 }
10563 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010564 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010565 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010566 }
Owen Taylor3473f882001-02-23 17:55:21 +000010567
10568 /*
10569 * We may have the encoding declaration
10570 */
William M. Brack76e95df2003-10-18 16:20:14 +000010571 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010572 if ((RAW == '?') && (NXT(1) == '>')) {
10573 SKIP(2);
10574 return;
10575 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010576 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010577 }
10578 xmlParseEncodingDecl(ctxt);
10579 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10580 /*
10581 * The XML REC instructs us to stop parsing right here
10582 */
10583 return;
10584 }
10585
10586 /*
10587 * We may have the standalone status.
10588 */
William M. Brack76e95df2003-10-18 16:20:14 +000010589 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010590 if ((RAW == '?') && (NXT(1) == '>')) {
10591 SKIP(2);
10592 return;
10593 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010594 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010595 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010596
10597 /*
10598 * We can grow the input buffer freely at that point
10599 */
10600 GROW;
10601
Owen Taylor3473f882001-02-23 17:55:21 +000010602 SKIP_BLANKS;
10603 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10604
10605 SKIP_BLANKS;
10606 if ((RAW == '?') && (NXT(1) == '>')) {
10607 SKIP(2);
10608 } else if (RAW == '>') {
10609 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010610 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010611 NEXT;
10612 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010613 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010614 MOVETO_ENDTAG(CUR_PTR);
10615 NEXT;
10616 }
10617}
10618
10619/**
10620 * xmlParseMisc:
10621 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010622 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010623 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010624 *
10625 * [27] Misc ::= Comment | PI | S
10626 */
10627
10628void
10629xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010630 while ((ctxt->instate != XML_PARSER_EOF) &&
10631 (((RAW == '<') && (NXT(1) == '?')) ||
10632 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10633 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010634 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010635 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010636 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010637 NEXT;
10638 } else
10639 xmlParseComment(ctxt);
10640 }
10641}
10642
10643/**
10644 * xmlParseDocument:
10645 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010646 *
Owen Taylor3473f882001-02-23 17:55:21 +000010647 * parse an XML document (and build a tree if using the standard SAX
10648 * interface).
10649 *
10650 * [1] document ::= prolog element Misc*
10651 *
10652 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10653 *
10654 * Returns 0, -1 in case of error. the parser context is augmented
10655 * as a result of the parsing.
10656 */
10657
10658int
10659xmlParseDocument(xmlParserCtxtPtr ctxt) {
10660 xmlChar start[4];
10661 xmlCharEncoding enc;
10662
10663 xmlInitParser();
10664
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010665 if ((ctxt == NULL) || (ctxt->input == NULL))
10666 return(-1);
10667
Owen Taylor3473f882001-02-23 17:55:21 +000010668 GROW;
10669
10670 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010671 * SAX: detecting the level.
10672 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010673 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010674
10675 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010676 * SAX: beginning of the document processing.
10677 */
10678 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10679 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010680 if (ctxt->instate == XML_PARSER_EOF)
10681 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010682
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010683 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010684 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010685 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010686 * Get the 4 first bytes and decode the charset
10687 * if enc != XML_CHAR_ENCODING_NONE
10688 * plug some encoding conversion routines.
10689 */
10690 start[0] = RAW;
10691 start[1] = NXT(1);
10692 start[2] = NXT(2);
10693 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010694 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010695 if (enc != XML_CHAR_ENCODING_NONE) {
10696 xmlSwitchEncoding(ctxt, enc);
10697 }
Owen Taylor3473f882001-02-23 17:55:21 +000010698 }
10699
10700
10701 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010702 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010703 }
10704
10705 /*
10706 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010707 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010708 * than just the first line, unless the amount of data is really
10709 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010710 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010711 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10712 GROW;
10713 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010714 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010715
10716 /*
10717 * Note that we will switch encoding on the fly.
10718 */
10719 xmlParseXMLDecl(ctxt);
10720 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10721 /*
10722 * The XML REC instructs us to stop parsing right here
10723 */
10724 return(-1);
10725 }
10726 ctxt->standalone = ctxt->input->standalone;
10727 SKIP_BLANKS;
10728 } else {
10729 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10730 }
10731 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10732 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010733 if (ctxt->instate == XML_PARSER_EOF)
10734 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010735 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10736 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10737 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10738 }
Owen Taylor3473f882001-02-23 17:55:21 +000010739
10740 /*
10741 * The Misc part of the Prolog
10742 */
10743 GROW;
10744 xmlParseMisc(ctxt);
10745
10746 /*
10747 * Then possibly doc type declaration(s) and more Misc
10748 * (doctypedecl Misc*)?
10749 */
10750 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010751 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010752
10753 ctxt->inSubset = 1;
10754 xmlParseDocTypeDecl(ctxt);
10755 if (RAW == '[') {
10756 ctxt->instate = XML_PARSER_DTD;
10757 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010758 if (ctxt->instate == XML_PARSER_EOF)
10759 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010760 }
10761
10762 /*
10763 * Create and update the external subset.
10764 */
10765 ctxt->inSubset = 2;
10766 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10767 (!ctxt->disableSAX))
10768 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10769 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010770 if (ctxt->instate == XML_PARSER_EOF)
10771 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010772 ctxt->inSubset = 0;
10773
Daniel Veillardac4118d2008-01-11 05:27:32 +000010774 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010775
10776 ctxt->instate = XML_PARSER_PROLOG;
10777 xmlParseMisc(ctxt);
10778 }
10779
10780 /*
10781 * Time to start parsing the tree itself
10782 */
10783 GROW;
10784 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010785 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10786 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010787 } else {
10788 ctxt->instate = XML_PARSER_CONTENT;
10789 xmlParseElement(ctxt);
10790 ctxt->instate = XML_PARSER_EPILOG;
10791
10792
10793 /*
10794 * The Misc part at the end
10795 */
10796 xmlParseMisc(ctxt);
10797
Daniel Veillard561b7f82002-03-20 21:55:57 +000010798 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010799 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010800 }
10801 ctxt->instate = XML_PARSER_EOF;
10802 }
10803
10804 /*
10805 * SAX: end of the document processing.
10806 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010807 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010808 ctxt->sax->endDocument(ctxt->userData);
10809
Daniel Veillard5997aca2002-03-18 18:36:20 +000010810 /*
10811 * Remove locally kept entity definitions if the tree was not built
10812 */
10813 if ((ctxt->myDoc != NULL) &&
10814 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10815 xmlFreeDoc(ctxt->myDoc);
10816 ctxt->myDoc = NULL;
10817 }
10818
Daniel Veillardae0765b2008-07-31 19:54:59 +000010819 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10820 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10821 if (ctxt->valid)
10822 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10823 if (ctxt->nsWellFormed)
10824 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10825 if (ctxt->options & XML_PARSE_OLD10)
10826 ctxt->myDoc->properties |= XML_DOC_OLD10;
10827 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010828 if (! ctxt->wellFormed) {
10829 ctxt->valid = 0;
10830 return(-1);
10831 }
Owen Taylor3473f882001-02-23 17:55:21 +000010832 return(0);
10833}
10834
10835/**
10836 * xmlParseExtParsedEnt:
10837 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010838 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010839 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010840 * An external general parsed entity is well-formed if it matches the
10841 * production labeled extParsedEnt.
10842 *
10843 * [78] extParsedEnt ::= TextDecl? content
10844 *
10845 * Returns 0, -1 in case of error. the parser context is augmented
10846 * as a result of the parsing.
10847 */
10848
10849int
10850xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10851 xmlChar start[4];
10852 xmlCharEncoding enc;
10853
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010854 if ((ctxt == NULL) || (ctxt->input == NULL))
10855 return(-1);
10856
Owen Taylor3473f882001-02-23 17:55:21 +000010857 xmlDefaultSAXHandlerInit();
10858
Daniel Veillard309f81d2003-09-23 09:02:53 +000010859 xmlDetectSAX2(ctxt);
10860
Owen Taylor3473f882001-02-23 17:55:21 +000010861 GROW;
10862
10863 /*
10864 * SAX: beginning of the document processing.
10865 */
10866 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10867 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10868
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010869 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010870 * Get the 4 first bytes and decode the charset
10871 * if enc != XML_CHAR_ENCODING_NONE
10872 * plug some encoding conversion routines.
10873 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010874 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10875 start[0] = RAW;
10876 start[1] = NXT(1);
10877 start[2] = NXT(2);
10878 start[3] = NXT(3);
10879 enc = xmlDetectCharEncoding(start, 4);
10880 if (enc != XML_CHAR_ENCODING_NONE) {
10881 xmlSwitchEncoding(ctxt, enc);
10882 }
Owen Taylor3473f882001-02-23 17:55:21 +000010883 }
10884
10885
10886 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010887 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010888 }
10889
10890 /*
10891 * Check for the XMLDecl in the Prolog.
10892 */
10893 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010894 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010895
10896 /*
10897 * Note that we will switch encoding on the fly.
10898 */
10899 xmlParseXMLDecl(ctxt);
10900 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10901 /*
10902 * The XML REC instructs us to stop parsing right here
10903 */
10904 return(-1);
10905 }
10906 SKIP_BLANKS;
10907 } else {
10908 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10909 }
10910 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10911 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010912 if (ctxt->instate == XML_PARSER_EOF)
10913 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010914
10915 /*
10916 * Doing validity checking on chunk doesn't make sense
10917 */
10918 ctxt->instate = XML_PARSER_CONTENT;
10919 ctxt->validate = 0;
10920 ctxt->loadsubset = 0;
10921 ctxt->depth = 0;
10922
10923 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010924 if (ctxt->instate == XML_PARSER_EOF)
10925 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010926
Owen Taylor3473f882001-02-23 17:55:21 +000010927 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010928 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010929 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010930 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010931 }
10932
10933 /*
10934 * SAX: end of the document processing.
10935 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010936 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010937 ctxt->sax->endDocument(ctxt->userData);
10938
10939 if (! ctxt->wellFormed) return(-1);
10940 return(0);
10941}
10942
Daniel Veillard73b013f2003-09-30 12:36:01 +000010943#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010944/************************************************************************
10945 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010946 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010947 * *
10948 ************************************************************************/
10949
10950/**
10951 * xmlParseLookupSequence:
10952 * @ctxt: an XML parser context
10953 * @first: the first char to lookup
10954 * @next: the next char to lookup or zero
10955 * @third: the next char to lookup or zero
10956 *
10957 * Try to find if a sequence (first, next, third) or just (first next) or
10958 * (first) is available in the input stream.
10959 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10960 * to avoid rescanning sequences of bytes, it DOES change the state of the
10961 * parser, do not use liberally.
10962 *
10963 * Returns the index to the current parsing point if the full sequence
10964 * is available, -1 otherwise.
10965 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010966static int
Owen Taylor3473f882001-02-23 17:55:21 +000010967xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10968 xmlChar next, xmlChar third) {
10969 int base, len;
10970 xmlParserInputPtr in;
10971 const xmlChar *buf;
10972
10973 in = ctxt->input;
10974 if (in == NULL) return(-1);
10975 base = in->cur - in->base;
10976 if (base < 0) return(-1);
10977 if (ctxt->checkIndex > base)
10978 base = ctxt->checkIndex;
10979 if (in->buf == NULL) {
10980 buf = in->base;
10981 len = in->length;
10982 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010983 buf = xmlBufContent(in->buf->buffer);
10984 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010985 }
10986 /* take into account the sequence length */
10987 if (third) len -= 2;
10988 else if (next) len --;
10989 for (;base < len;base++) {
10990 if (buf[base] == first) {
10991 if (third != 0) {
10992 if ((buf[base + 1] != next) ||
10993 (buf[base + 2] != third)) continue;
10994 } else if (next != 0) {
10995 if (buf[base + 1] != next) continue;
10996 }
10997 ctxt->checkIndex = 0;
10998#ifdef DEBUG_PUSH
10999 if (next == 0)
11000 xmlGenericError(xmlGenericErrorContext,
11001 "PP: lookup '%c' found at %d\n",
11002 first, base);
11003 else if (third == 0)
11004 xmlGenericError(xmlGenericErrorContext,
11005 "PP: lookup '%c%c' found at %d\n",
11006 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011007 else
Owen Taylor3473f882001-02-23 17:55:21 +000011008 xmlGenericError(xmlGenericErrorContext,
11009 "PP: lookup '%c%c%c' found at %d\n",
11010 first, next, third, base);
11011#endif
11012 return(base - (in->cur - in->base));
11013 }
11014 }
11015 ctxt->checkIndex = base;
11016#ifdef DEBUG_PUSH
11017 if (next == 0)
11018 xmlGenericError(xmlGenericErrorContext,
11019 "PP: lookup '%c' failed\n", first);
11020 else if (third == 0)
11021 xmlGenericError(xmlGenericErrorContext,
11022 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011023 else
Owen Taylor3473f882001-02-23 17:55:21 +000011024 xmlGenericError(xmlGenericErrorContext,
11025 "PP: lookup '%c%c%c' failed\n", first, next, third);
11026#endif
11027 return(-1);
11028}
11029
11030/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011031 * xmlParseGetLasts:
11032 * @ctxt: an XML parser context
11033 * @lastlt: pointer to store the last '<' from the input
11034 * @lastgt: pointer to store the last '>' from the input
11035 *
11036 * Lookup the last < and > in the current chunk
11037 */
11038static void
11039xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11040 const xmlChar **lastgt) {
11041 const xmlChar *tmp;
11042
11043 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11044 xmlGenericError(xmlGenericErrorContext,
11045 "Internal error: xmlParseGetLasts\n");
11046 return;
11047 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011048 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011049 tmp = ctxt->input->end;
11050 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011051 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011052 if (tmp < ctxt->input->base) {
11053 *lastlt = NULL;
11054 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011055 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011056 *lastlt = tmp;
11057 tmp++;
11058 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11059 if (*tmp == '\'') {
11060 tmp++;
11061 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11062 if (tmp < ctxt->input->end) tmp++;
11063 } else if (*tmp == '"') {
11064 tmp++;
11065 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11066 if (tmp < ctxt->input->end) tmp++;
11067 } else
11068 tmp++;
11069 }
11070 if (tmp < ctxt->input->end)
11071 *lastgt = tmp;
11072 else {
11073 tmp = *lastlt;
11074 tmp--;
11075 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11076 if (tmp >= ctxt->input->base)
11077 *lastgt = tmp;
11078 else
11079 *lastgt = NULL;
11080 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011081 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011082 } else {
11083 *lastlt = NULL;
11084 *lastgt = NULL;
11085 }
11086}
11087/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011088 * xmlCheckCdataPush:
11089 * @cur: pointer to the bock of characters
11090 * @len: length of the block in bytes
11091 *
11092 * Check that the block of characters is okay as SCdata content [20]
11093 *
11094 * Returns the number of bytes to pass if okay, a negative index where an
11095 * UTF-8 error occured otherwise
11096 */
11097static int
11098xmlCheckCdataPush(const xmlChar *utf, int len) {
11099 int ix;
11100 unsigned char c;
11101 int codepoint;
11102
11103 if ((utf == NULL) || (len <= 0))
11104 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011105
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011106 for (ix = 0; ix < len;) { /* string is 0-terminated */
11107 c = utf[ix];
11108 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11109 if (c >= 0x20)
11110 ix++;
11111 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11112 ix++;
11113 else
11114 return(-ix);
11115 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11116 if (ix + 2 > len) return(ix);
11117 if ((utf[ix+1] & 0xc0 ) != 0x80)
11118 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011119 codepoint = (utf[ix] & 0x1f) << 6;
11120 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011121 if (!xmlIsCharQ(codepoint))
11122 return(-ix);
11123 ix += 2;
11124 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11125 if (ix + 3 > len) return(ix);
11126 if (((utf[ix+1] & 0xc0) != 0x80) ||
11127 ((utf[ix+2] & 0xc0) != 0x80))
11128 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011129 codepoint = (utf[ix] & 0xf) << 12;
11130 codepoint |= (utf[ix+1] & 0x3f) << 6;
11131 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011132 if (!xmlIsCharQ(codepoint))
11133 return(-ix);
11134 ix += 3;
11135 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11136 if (ix + 4 > len) return(ix);
11137 if (((utf[ix+1] & 0xc0) != 0x80) ||
11138 ((utf[ix+2] & 0xc0) != 0x80) ||
11139 ((utf[ix+3] & 0xc0) != 0x80))
11140 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011141 codepoint = (utf[ix] & 0x7) << 18;
11142 codepoint |= (utf[ix+1] & 0x3f) << 12;
11143 codepoint |= (utf[ix+2] & 0x3f) << 6;
11144 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011145 if (!xmlIsCharQ(codepoint))
11146 return(-ix);
11147 ix += 4;
11148 } else /* unknown encoding */
11149 return(-ix);
11150 }
11151 return(ix);
11152}
11153
11154/**
Owen Taylor3473f882001-02-23 17:55:21 +000011155 * xmlParseTryOrFinish:
11156 * @ctxt: an XML parser context
11157 * @terminate: last chunk indicator
11158 *
11159 * Try to progress on parsing
11160 *
11161 * Returns zero if no parsing was possible
11162 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011163static int
Owen Taylor3473f882001-02-23 17:55:21 +000011164xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11165 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011166 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011167 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011168 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011169
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011170 if (ctxt->input == NULL)
11171 return(0);
11172
Owen Taylor3473f882001-02-23 17:55:21 +000011173#ifdef DEBUG_PUSH
11174 switch (ctxt->instate) {
11175 case XML_PARSER_EOF:
11176 xmlGenericError(xmlGenericErrorContext,
11177 "PP: try EOF\n"); break;
11178 case XML_PARSER_START:
11179 xmlGenericError(xmlGenericErrorContext,
11180 "PP: try START\n"); break;
11181 case XML_PARSER_MISC:
11182 xmlGenericError(xmlGenericErrorContext,
11183 "PP: try MISC\n");break;
11184 case XML_PARSER_COMMENT:
11185 xmlGenericError(xmlGenericErrorContext,
11186 "PP: try COMMENT\n");break;
11187 case XML_PARSER_PROLOG:
11188 xmlGenericError(xmlGenericErrorContext,
11189 "PP: try PROLOG\n");break;
11190 case XML_PARSER_START_TAG:
11191 xmlGenericError(xmlGenericErrorContext,
11192 "PP: try START_TAG\n");break;
11193 case XML_PARSER_CONTENT:
11194 xmlGenericError(xmlGenericErrorContext,
11195 "PP: try CONTENT\n");break;
11196 case XML_PARSER_CDATA_SECTION:
11197 xmlGenericError(xmlGenericErrorContext,
11198 "PP: try CDATA_SECTION\n");break;
11199 case XML_PARSER_END_TAG:
11200 xmlGenericError(xmlGenericErrorContext,
11201 "PP: try END_TAG\n");break;
11202 case XML_PARSER_ENTITY_DECL:
11203 xmlGenericError(xmlGenericErrorContext,
11204 "PP: try ENTITY_DECL\n");break;
11205 case XML_PARSER_ENTITY_VALUE:
11206 xmlGenericError(xmlGenericErrorContext,
11207 "PP: try ENTITY_VALUE\n");break;
11208 case XML_PARSER_ATTRIBUTE_VALUE:
11209 xmlGenericError(xmlGenericErrorContext,
11210 "PP: try ATTRIBUTE_VALUE\n");break;
11211 case XML_PARSER_DTD:
11212 xmlGenericError(xmlGenericErrorContext,
11213 "PP: try DTD\n");break;
11214 case XML_PARSER_EPILOG:
11215 xmlGenericError(xmlGenericErrorContext,
11216 "PP: try EPILOG\n");break;
11217 case XML_PARSER_PI:
11218 xmlGenericError(xmlGenericErrorContext,
11219 "PP: try PI\n");break;
11220 case XML_PARSER_IGNORE:
11221 xmlGenericError(xmlGenericErrorContext,
11222 "PP: try IGNORE\n");break;
11223 }
11224#endif
11225
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011226 if ((ctxt->input != NULL) &&
11227 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011228 xmlSHRINK(ctxt);
11229 ctxt->checkIndex = 0;
11230 }
11231 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011232
Daniel Veillarde50ba812013-04-11 15:54:51 +080011233 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011234 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011235 return(0);
11236
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011237
Owen Taylor3473f882001-02-23 17:55:21 +000011238 /*
11239 * Pop-up of finished entities.
11240 */
11241 while ((RAW == 0) && (ctxt->inputNr > 1))
11242 xmlPopInput(ctxt);
11243
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011244 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011245 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011246 avail = ctxt->input->length -
11247 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011248 else {
11249 /*
11250 * If we are operating on converted input, try to flush
11251 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011252 * buffer. But do not do this in document start where
11253 * encoding="..." may not have been read and we work on a
11254 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011255 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011256 if ((ctxt->instate != XML_PARSER_START) &&
11257 (ctxt->input->buf->raw != NULL) &&
11258 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011259 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11260 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011261 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011262
11263 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011264 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11265 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011266 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011267 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011268 (ctxt->input->cur - ctxt->input->base);
11269 }
Owen Taylor3473f882001-02-23 17:55:21 +000011270 if (avail < 1)
11271 goto done;
11272 switch (ctxt->instate) {
11273 case XML_PARSER_EOF:
11274 /*
11275 * Document parsing is done !
11276 */
11277 goto done;
11278 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011279 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11280 xmlChar start[4];
11281 xmlCharEncoding enc;
11282
11283 /*
11284 * Very first chars read from the document flow.
11285 */
11286 if (avail < 4)
11287 goto done;
11288
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011289 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011290 * Get the 4 first bytes and decode the charset
11291 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011292 * plug some encoding conversion routines,
11293 * else xmlSwitchEncoding will set to (default)
11294 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011295 */
11296 start[0] = RAW;
11297 start[1] = NXT(1);
11298 start[2] = NXT(2);
11299 start[3] = NXT(3);
11300 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011301 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011302 break;
11303 }
Owen Taylor3473f882001-02-23 17:55:21 +000011304
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011305 if (avail < 2)
11306 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011307 cur = ctxt->input->cur[0];
11308 next = ctxt->input->cur[1];
11309 if (cur == 0) {
11310 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11311 ctxt->sax->setDocumentLocator(ctxt->userData,
11312 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011313 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011314 ctxt->instate = XML_PARSER_EOF;
11315#ifdef DEBUG_PUSH
11316 xmlGenericError(xmlGenericErrorContext,
11317 "PP: entering EOF\n");
11318#endif
11319 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11320 ctxt->sax->endDocument(ctxt->userData);
11321 goto done;
11322 }
11323 if ((cur == '<') && (next == '?')) {
11324 /* PI or XML decl */
11325 if (avail < 5) return(ret);
11326 if ((!terminate) &&
11327 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11328 return(ret);
11329 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11330 ctxt->sax->setDocumentLocator(ctxt->userData,
11331 &xmlDefaultSAXLocator);
11332 if ((ctxt->input->cur[2] == 'x') &&
11333 (ctxt->input->cur[3] == 'm') &&
11334 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011335 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011336 ret += 5;
11337#ifdef DEBUG_PUSH
11338 xmlGenericError(xmlGenericErrorContext,
11339 "PP: Parsing XML Decl\n");
11340#endif
11341 xmlParseXMLDecl(ctxt);
11342 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11343 /*
11344 * The XML REC instructs us to stop parsing right
11345 * here
11346 */
11347 ctxt->instate = XML_PARSER_EOF;
11348 return(0);
11349 }
11350 ctxt->standalone = ctxt->input->standalone;
11351 if ((ctxt->encoding == NULL) &&
11352 (ctxt->input->encoding != NULL))
11353 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11354 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11355 (!ctxt->disableSAX))
11356 ctxt->sax->startDocument(ctxt->userData);
11357 ctxt->instate = XML_PARSER_MISC;
11358#ifdef DEBUG_PUSH
11359 xmlGenericError(xmlGenericErrorContext,
11360 "PP: entering MISC\n");
11361#endif
11362 } else {
11363 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11364 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11365 (!ctxt->disableSAX))
11366 ctxt->sax->startDocument(ctxt->userData);
11367 ctxt->instate = XML_PARSER_MISC;
11368#ifdef DEBUG_PUSH
11369 xmlGenericError(xmlGenericErrorContext,
11370 "PP: entering MISC\n");
11371#endif
11372 }
11373 } else {
11374 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11375 ctxt->sax->setDocumentLocator(ctxt->userData,
11376 &xmlDefaultSAXLocator);
11377 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011378 if (ctxt->version == NULL) {
11379 xmlErrMemory(ctxt, NULL);
11380 break;
11381 }
Owen Taylor3473f882001-02-23 17:55:21 +000011382 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11383 (!ctxt->disableSAX))
11384 ctxt->sax->startDocument(ctxt->userData);
11385 ctxt->instate = XML_PARSER_MISC;
11386#ifdef DEBUG_PUSH
11387 xmlGenericError(xmlGenericErrorContext,
11388 "PP: entering MISC\n");
11389#endif
11390 }
11391 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011392 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011393 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011394 const xmlChar *prefix = NULL;
11395 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011396 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011397
11398 if ((avail < 2) && (ctxt->inputNr == 1))
11399 goto done;
11400 cur = ctxt->input->cur[0];
11401 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011402 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011403 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011404 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11405 ctxt->sax->endDocument(ctxt->userData);
11406 goto done;
11407 }
11408 if (!terminate) {
11409 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011410 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011411 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011412 goto done;
11413 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11414 goto done;
11415 }
11416 }
11417 if (ctxt->spaceNr == 0)
11418 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011419 else if (*ctxt->space == -2)
11420 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011421 else
11422 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011423#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011424 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011425#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011426 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011427#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011428 else
11429 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011430#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011431 if (ctxt->instate == XML_PARSER_EOF)
11432 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011433 if (name == NULL) {
11434 spacePop(ctxt);
11435 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011436 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11437 ctxt->sax->endDocument(ctxt->userData);
11438 goto done;
11439 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011440#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011441 /*
11442 * [ VC: Root Element Type ]
11443 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011444 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011445 */
11446 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11447 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11448 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011449#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011450
11451 /*
11452 * Check for an Empty Element.
11453 */
11454 if ((RAW == '/') && (NXT(1) == '>')) {
11455 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011456
11457 if (ctxt->sax2) {
11458 if ((ctxt->sax != NULL) &&
11459 (ctxt->sax->endElementNs != NULL) &&
11460 (!ctxt->disableSAX))
11461 ctxt->sax->endElementNs(ctxt->userData, name,
11462 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011463 if (ctxt->nsNr - nsNr > 0)
11464 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011465#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011466 } else {
11467 if ((ctxt->sax != NULL) &&
11468 (ctxt->sax->endElement != NULL) &&
11469 (!ctxt->disableSAX))
11470 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011471#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011472 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011473 if (ctxt->instate == XML_PARSER_EOF)
11474 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011475 spacePop(ctxt);
11476 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011477 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011478 } else {
11479 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011480 }
Daniel Veillard65686452012-07-19 18:25:01 +080011481 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011482 break;
11483 }
11484 if (RAW == '>') {
11485 NEXT;
11486 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011487 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011488 "Couldn't find end of Start Tag %s\n",
11489 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011490 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011491 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011492 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011493 if (ctxt->sax2)
11494 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011495#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011496 else
11497 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011498#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011499
Daniel Veillarda880b122003-04-21 21:36:41 +000011500 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011501 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011502 break;
11503 }
11504 case XML_PARSER_CONTENT: {
11505 const xmlChar *test;
11506 unsigned int cons;
11507 if ((avail < 2) && (ctxt->inputNr == 1))
11508 goto done;
11509 cur = ctxt->input->cur[0];
11510 next = ctxt->input->cur[1];
11511
11512 test = CUR_PTR;
11513 cons = ctxt->input->consumed;
11514 if ((cur == '<') && (next == '/')) {
11515 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011516 break;
11517 } else if ((cur == '<') && (next == '?')) {
11518 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011519 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11520 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011521 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011522 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011523 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011524 ctxt->instate = XML_PARSER_CONTENT;
11525 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011526 } else if ((cur == '<') && (next != '!')) {
11527 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011528 break;
11529 } else if ((cur == '<') && (next == '!') &&
11530 (ctxt->input->cur[2] == '-') &&
11531 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011532 int term;
11533
11534 if (avail < 4)
11535 goto done;
11536 ctxt->input->cur += 4;
11537 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11538 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011539 if ((!terminate) && (term < 0)) {
11540 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011541 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011542 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011543 xmlParseComment(ctxt);
11544 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011545 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011546 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11547 (ctxt->input->cur[2] == '[') &&
11548 (ctxt->input->cur[3] == 'C') &&
11549 (ctxt->input->cur[4] == 'D') &&
11550 (ctxt->input->cur[5] == 'A') &&
11551 (ctxt->input->cur[6] == 'T') &&
11552 (ctxt->input->cur[7] == 'A') &&
11553 (ctxt->input->cur[8] == '[')) {
11554 SKIP(9);
11555 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011556 break;
11557 } else if ((cur == '<') && (next == '!') &&
11558 (avail < 9)) {
11559 goto done;
11560 } else if (cur == '&') {
11561 if ((!terminate) &&
11562 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11563 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011564 xmlParseReference(ctxt);
11565 } else {
11566 /* TODO Avoid the extra copy, handle directly !!! */
11567 /*
11568 * Goal of the following test is:
11569 * - minimize calls to the SAX 'character' callback
11570 * when they are mergeable
11571 * - handle an problem for isBlank when we only parse
11572 * a sequence of blank chars and the next one is
11573 * not available to check against '<' presence.
11574 * - tries to homogenize the differences in SAX
11575 * callbacks between the push and pull versions
11576 * of the parser.
11577 */
11578 if ((ctxt->inputNr == 1) &&
11579 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11580 if (!terminate) {
11581 if (ctxt->progressive) {
11582 if ((lastlt == NULL) ||
11583 (ctxt->input->cur > lastlt))
11584 goto done;
11585 } else if (xmlParseLookupSequence(ctxt,
11586 '<', 0, 0) < 0) {
11587 goto done;
11588 }
11589 }
11590 }
11591 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011592 xmlParseCharData(ctxt, 0);
11593 }
11594 /*
11595 * Pop-up of finished entities.
11596 */
11597 while ((RAW == 0) && (ctxt->inputNr > 1))
11598 xmlPopInput(ctxt);
11599 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011600 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11601 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011602 ctxt->instate = XML_PARSER_EOF;
11603 break;
11604 }
11605 break;
11606 }
11607 case XML_PARSER_END_TAG:
11608 if (avail < 2)
11609 goto done;
11610 if (!terminate) {
11611 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011612 /* > can be found unescaped in attribute values */
11613 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011614 goto done;
11615 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11616 goto done;
11617 }
11618 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011619 if (ctxt->sax2) {
11620 xmlParseEndTag2(ctxt,
11621 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11622 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011623 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011624 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011625 }
11626#ifdef LIBXML_SAX1_ENABLED
11627 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011628 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011629#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011630 if (ctxt->instate == XML_PARSER_EOF) {
11631 /* Nothing */
11632 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011633 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011634 } else {
11635 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011636 }
11637 break;
11638 case XML_PARSER_CDATA_SECTION: {
11639 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011640 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011641 * cdataBlock merge back contiguous callbacks.
11642 */
11643 int base;
11644
11645 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11646 if (base < 0) {
11647 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011648 int tmp;
11649
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011650 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011651 XML_PARSER_BIG_BUFFER_SIZE);
11652 if (tmp < 0) {
11653 tmp = -tmp;
11654 ctxt->input->cur += tmp;
11655 goto encoding_error;
11656 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011657 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11658 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011659 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011660 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011661 else if (ctxt->sax->characters != NULL)
11662 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011663 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011664 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011665 if (ctxt->instate == XML_PARSER_EOF)
11666 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011667 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011668 ctxt->checkIndex = 0;
11669 }
11670 goto done;
11671 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011672 int tmp;
11673
11674 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11675 if ((tmp < 0) || (tmp != base)) {
11676 tmp = -tmp;
11677 ctxt->input->cur += tmp;
11678 goto encoding_error;
11679 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011680 if ((ctxt->sax != NULL) && (base == 0) &&
11681 (ctxt->sax->cdataBlock != NULL) &&
11682 (!ctxt->disableSAX)) {
11683 /*
11684 * Special case to provide identical behaviour
11685 * between pull and push parsers on enpty CDATA
11686 * sections
11687 */
11688 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11689 (!strncmp((const char *)&ctxt->input->cur[-9],
11690 "<![CDATA[", 9)))
11691 ctxt->sax->cdataBlock(ctxt->userData,
11692 BAD_CAST "", 0);
11693 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011694 (!ctxt->disableSAX)) {
11695 if (ctxt->sax->cdataBlock != NULL)
11696 ctxt->sax->cdataBlock(ctxt->userData,
11697 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011698 else if (ctxt->sax->characters != NULL)
11699 ctxt->sax->characters(ctxt->userData,
11700 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011701 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011702 if (ctxt->instate == XML_PARSER_EOF)
11703 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011704 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011705 ctxt->checkIndex = 0;
11706 ctxt->instate = XML_PARSER_CONTENT;
11707#ifdef DEBUG_PUSH
11708 xmlGenericError(xmlGenericErrorContext,
11709 "PP: entering CONTENT\n");
11710#endif
11711 }
11712 break;
11713 }
Owen Taylor3473f882001-02-23 17:55:21 +000011714 case XML_PARSER_MISC:
11715 SKIP_BLANKS;
11716 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011717 avail = ctxt->input->length -
11718 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011719 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011720 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011721 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011722 if (avail < 2)
11723 goto done;
11724 cur = ctxt->input->cur[0];
11725 next = ctxt->input->cur[1];
11726 if ((cur == '<') && (next == '?')) {
11727 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011728 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11729 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011730 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011731 }
Owen Taylor3473f882001-02-23 17:55:21 +000011732#ifdef DEBUG_PUSH
11733 xmlGenericError(xmlGenericErrorContext,
11734 "PP: Parsing PI\n");
11735#endif
11736 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011737 if (ctxt->instate == XML_PARSER_EOF)
11738 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011739 ctxt->instate = XML_PARSER_MISC;
11740 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011741 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011742 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011743 (ctxt->input->cur[2] == '-') &&
11744 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011745 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011746 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11747 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011748 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011749 }
Owen Taylor3473f882001-02-23 17:55:21 +000011750#ifdef DEBUG_PUSH
11751 xmlGenericError(xmlGenericErrorContext,
11752 "PP: Parsing Comment\n");
11753#endif
11754 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011755 if (ctxt->instate == XML_PARSER_EOF)
11756 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011757 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011758 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011759 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011760 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011761 (ctxt->input->cur[2] == 'D') &&
11762 (ctxt->input->cur[3] == 'O') &&
11763 (ctxt->input->cur[4] == 'C') &&
11764 (ctxt->input->cur[5] == 'T') &&
11765 (ctxt->input->cur[6] == 'Y') &&
11766 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011767 (ctxt->input->cur[8] == 'E')) {
11768 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011769 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11770 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011771 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011772 }
Owen Taylor3473f882001-02-23 17:55:21 +000011773#ifdef DEBUG_PUSH
11774 xmlGenericError(xmlGenericErrorContext,
11775 "PP: Parsing internal subset\n");
11776#endif
11777 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011778 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011779 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011780 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011781 if (ctxt->instate == XML_PARSER_EOF)
11782 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011783 if (RAW == '[') {
11784 ctxt->instate = XML_PARSER_DTD;
11785#ifdef DEBUG_PUSH
11786 xmlGenericError(xmlGenericErrorContext,
11787 "PP: entering DTD\n");
11788#endif
11789 } else {
11790 /*
11791 * Create and update the external subset.
11792 */
11793 ctxt->inSubset = 2;
11794 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11795 (ctxt->sax->externalSubset != NULL))
11796 ctxt->sax->externalSubset(ctxt->userData,
11797 ctxt->intSubName, ctxt->extSubSystem,
11798 ctxt->extSubURI);
11799 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011800 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011801 ctxt->instate = XML_PARSER_PROLOG;
11802#ifdef DEBUG_PUSH
11803 xmlGenericError(xmlGenericErrorContext,
11804 "PP: entering PROLOG\n");
11805#endif
11806 }
11807 } else if ((cur == '<') && (next == '!') &&
11808 (avail < 9)) {
11809 goto done;
11810 } else {
11811 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011812 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011813 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011814#ifdef DEBUG_PUSH
11815 xmlGenericError(xmlGenericErrorContext,
11816 "PP: entering START_TAG\n");
11817#endif
11818 }
11819 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011820 case XML_PARSER_PROLOG:
11821 SKIP_BLANKS;
11822 if (ctxt->input->buf == NULL)
11823 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11824 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011825 avail = xmlBufUse(ctxt->input->buf->buffer) -
11826 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011827 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011828 goto done;
11829 cur = ctxt->input->cur[0];
11830 next = ctxt->input->cur[1];
11831 if ((cur == '<') && (next == '?')) {
11832 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011833 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11834 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011835 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011836 }
Owen Taylor3473f882001-02-23 17:55:21 +000011837#ifdef DEBUG_PUSH
11838 xmlGenericError(xmlGenericErrorContext,
11839 "PP: Parsing PI\n");
11840#endif
11841 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011842 if (ctxt->instate == XML_PARSER_EOF)
11843 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011844 ctxt->instate = XML_PARSER_PROLOG;
11845 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011846 } else if ((cur == '<') && (next == '!') &&
11847 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11848 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011849 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11850 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011851 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011852 }
Owen Taylor3473f882001-02-23 17:55:21 +000011853#ifdef DEBUG_PUSH
11854 xmlGenericError(xmlGenericErrorContext,
11855 "PP: Parsing Comment\n");
11856#endif
11857 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011858 if (ctxt->instate == XML_PARSER_EOF)
11859 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011860 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011861 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011862 } else if ((cur == '<') && (next == '!') &&
11863 (avail < 4)) {
11864 goto done;
11865 } else {
11866 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011867 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011868 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011869 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011870#ifdef DEBUG_PUSH
11871 xmlGenericError(xmlGenericErrorContext,
11872 "PP: entering START_TAG\n");
11873#endif
11874 }
11875 break;
11876 case XML_PARSER_EPILOG:
11877 SKIP_BLANKS;
11878 if (ctxt->input->buf == NULL)
11879 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11880 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011881 avail = xmlBufUse(ctxt->input->buf->buffer) -
11882 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011883 if (avail < 2)
11884 goto done;
11885 cur = ctxt->input->cur[0];
11886 next = ctxt->input->cur[1];
11887 if ((cur == '<') && (next == '?')) {
11888 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011889 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11890 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011891 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011892 }
Owen Taylor3473f882001-02-23 17:55:21 +000011893#ifdef DEBUG_PUSH
11894 xmlGenericError(xmlGenericErrorContext,
11895 "PP: Parsing PI\n");
11896#endif
11897 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011898 if (ctxt->instate == XML_PARSER_EOF)
11899 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011900 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011901 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011902 } else if ((cur == '<') && (next == '!') &&
11903 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11904 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011905 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11906 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011907 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011908 }
Owen Taylor3473f882001-02-23 17:55:21 +000011909#ifdef DEBUG_PUSH
11910 xmlGenericError(xmlGenericErrorContext,
11911 "PP: Parsing Comment\n");
11912#endif
11913 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011914 if (ctxt->instate == XML_PARSER_EOF)
11915 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011916 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011917 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011918 } else if ((cur == '<') && (next == '!') &&
11919 (avail < 4)) {
11920 goto done;
11921 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011922 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011923 ctxt->instate = XML_PARSER_EOF;
11924#ifdef DEBUG_PUSH
11925 xmlGenericError(xmlGenericErrorContext,
11926 "PP: entering EOF\n");
11927#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011928 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011929 ctxt->sax->endDocument(ctxt->userData);
11930 goto done;
11931 }
11932 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011933 case XML_PARSER_DTD: {
11934 /*
11935 * Sorry but progressive parsing of the internal subset
11936 * is not expected to be supported. We first check that
11937 * the full content of the internal subset is available and
11938 * the parsing is launched only at that point.
11939 * Internal subset ends up with "']' S? '>'" in an unescaped
11940 * section and not in a ']]>' sequence which are conditional
11941 * sections (whoever argued to keep that crap in XML deserve
11942 * a place in hell !).
11943 */
11944 int base, i;
11945 xmlChar *buf;
11946 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011947 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011948
11949 base = ctxt->input->cur - ctxt->input->base;
11950 if (base < 0) return(0);
11951 if (ctxt->checkIndex > base)
11952 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011953 buf = xmlBufContent(ctxt->input->buf->buffer);
11954 use = xmlBufUse(ctxt->input->buf->buffer);
11955 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011956 if (quote != 0) {
11957 if (buf[base] == quote)
11958 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011959 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011960 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011961 if ((quote == 0) && (buf[base] == '<')) {
11962 int found = 0;
11963 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011964 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011965 (buf[base + 1] == '!') &&
11966 (buf[base + 2] == '-') &&
11967 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011968 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011969 if ((buf[base] == '-') &&
11970 (buf[base + 1] == '-') &&
11971 (buf[base + 2] == '>')) {
11972 found = 1;
11973 base += 2;
11974 break;
11975 }
11976 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011977 if (!found) {
11978#if 0
11979 fprintf(stderr, "unfinished comment\n");
11980#endif
11981 break; /* for */
11982 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011983 continue;
11984 }
11985 }
Owen Taylor3473f882001-02-23 17:55:21 +000011986 if (buf[base] == '"') {
11987 quote = '"';
11988 continue;
11989 }
11990 if (buf[base] == '\'') {
11991 quote = '\'';
11992 continue;
11993 }
11994 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011995#if 0
11996 fprintf(stderr, "%c%c%c%c: ", buf[base],
11997 buf[base + 1], buf[base + 2], buf[base + 3]);
11998#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011999 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012000 break;
12001 if (buf[base + 1] == ']') {
12002 /* conditional crap, skip both ']' ! */
12003 base++;
12004 continue;
12005 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012006 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012007 if (buf[base + i] == '>') {
12008#if 0
12009 fprintf(stderr, "found\n");
12010#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012011 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012012 }
12013 if (!IS_BLANK_CH(buf[base + i])) {
12014#if 0
12015 fprintf(stderr, "not found\n");
12016#endif
12017 goto not_end_of_int_subset;
12018 }
Owen Taylor3473f882001-02-23 17:55:21 +000012019 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012020#if 0
12021 fprintf(stderr, "end of stream\n");
12022#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012023 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012024
Owen Taylor3473f882001-02-23 17:55:21 +000012025 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012026not_end_of_int_subset:
12027 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012028 }
12029 /*
12030 * We didn't found the end of the Internal subset
12031 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012032 if (quote == 0)
12033 ctxt->checkIndex = base;
12034 else
12035 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012036#ifdef DEBUG_PUSH
12037 if (next == 0)
12038 xmlGenericError(xmlGenericErrorContext,
12039 "PP: lookup of int subset end filed\n");
12040#endif
12041 goto done;
12042
12043found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012044 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012045 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012046 if (ctxt->instate == XML_PARSER_EOF)
12047 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012048 ctxt->inSubset = 2;
12049 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12050 (ctxt->sax->externalSubset != NULL))
12051 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12052 ctxt->extSubSystem, ctxt->extSubURI);
12053 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012054 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012055 if (ctxt->instate == XML_PARSER_EOF)
12056 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012057 ctxt->instate = XML_PARSER_PROLOG;
12058 ctxt->checkIndex = 0;
12059#ifdef DEBUG_PUSH
12060 xmlGenericError(xmlGenericErrorContext,
12061 "PP: entering PROLOG\n");
12062#endif
12063 break;
12064 }
12065 case XML_PARSER_COMMENT:
12066 xmlGenericError(xmlGenericErrorContext,
12067 "PP: internal error, state == COMMENT\n");
12068 ctxt->instate = XML_PARSER_CONTENT;
12069#ifdef DEBUG_PUSH
12070 xmlGenericError(xmlGenericErrorContext,
12071 "PP: entering CONTENT\n");
12072#endif
12073 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012074 case XML_PARSER_IGNORE:
12075 xmlGenericError(xmlGenericErrorContext,
12076 "PP: internal error, state == IGNORE");
12077 ctxt->instate = XML_PARSER_DTD;
12078#ifdef DEBUG_PUSH
12079 xmlGenericError(xmlGenericErrorContext,
12080 "PP: entering DTD\n");
12081#endif
12082 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012083 case XML_PARSER_PI:
12084 xmlGenericError(xmlGenericErrorContext,
12085 "PP: internal error, state == PI\n");
12086 ctxt->instate = XML_PARSER_CONTENT;
12087#ifdef DEBUG_PUSH
12088 xmlGenericError(xmlGenericErrorContext,
12089 "PP: entering CONTENT\n");
12090#endif
12091 break;
12092 case XML_PARSER_ENTITY_DECL:
12093 xmlGenericError(xmlGenericErrorContext,
12094 "PP: internal error, state == ENTITY_DECL\n");
12095 ctxt->instate = XML_PARSER_DTD;
12096#ifdef DEBUG_PUSH
12097 xmlGenericError(xmlGenericErrorContext,
12098 "PP: entering DTD\n");
12099#endif
12100 break;
12101 case XML_PARSER_ENTITY_VALUE:
12102 xmlGenericError(xmlGenericErrorContext,
12103 "PP: internal error, state == ENTITY_VALUE\n");
12104 ctxt->instate = XML_PARSER_CONTENT;
12105#ifdef DEBUG_PUSH
12106 xmlGenericError(xmlGenericErrorContext,
12107 "PP: entering DTD\n");
12108#endif
12109 break;
12110 case XML_PARSER_ATTRIBUTE_VALUE:
12111 xmlGenericError(xmlGenericErrorContext,
12112 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12113 ctxt->instate = XML_PARSER_START_TAG;
12114#ifdef DEBUG_PUSH
12115 xmlGenericError(xmlGenericErrorContext,
12116 "PP: entering START_TAG\n");
12117#endif
12118 break;
12119 case XML_PARSER_SYSTEM_LITERAL:
12120 xmlGenericError(xmlGenericErrorContext,
12121 "PP: internal error, state == SYSTEM_LITERAL\n");
12122 ctxt->instate = XML_PARSER_START_TAG;
12123#ifdef DEBUG_PUSH
12124 xmlGenericError(xmlGenericErrorContext,
12125 "PP: entering START_TAG\n");
12126#endif
12127 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012128 case XML_PARSER_PUBLIC_LITERAL:
12129 xmlGenericError(xmlGenericErrorContext,
12130 "PP: internal error, state == PUBLIC_LITERAL\n");
12131 ctxt->instate = XML_PARSER_START_TAG;
12132#ifdef DEBUG_PUSH
12133 xmlGenericError(xmlGenericErrorContext,
12134 "PP: entering START_TAG\n");
12135#endif
12136 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012137 }
12138 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012139done:
Owen Taylor3473f882001-02-23 17:55:21 +000012140#ifdef DEBUG_PUSH
12141 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12142#endif
12143 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012144encoding_error:
12145 {
12146 char buffer[150];
12147
12148 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12149 ctxt->input->cur[0], ctxt->input->cur[1],
12150 ctxt->input->cur[2], ctxt->input->cur[3]);
12151 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12152 "Input is not proper UTF-8, indicate encoding !\n%s",
12153 BAD_CAST buffer, NULL);
12154 }
12155 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012156}
12157
12158/**
Daniel Veillard65686452012-07-19 18:25:01 +080012159 * xmlParseCheckTransition:
12160 * @ctxt: an XML parser context
12161 * @chunk: a char array
12162 * @size: the size in byte of the chunk
12163 *
12164 * Check depending on the current parser state if the chunk given must be
12165 * processed immediately or one need more data to advance on parsing.
12166 *
12167 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12168 */
12169static int
12170xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12171 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12172 return(-1);
12173 if (ctxt->instate == XML_PARSER_START_TAG) {
12174 if (memchr(chunk, '>', size) != NULL)
12175 return(1);
12176 return(0);
12177 }
12178 if (ctxt->progressive == XML_PARSER_COMMENT) {
12179 if (memchr(chunk, '>', size) != NULL)
12180 return(1);
12181 return(0);
12182 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012183 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12184 if (memchr(chunk, '>', size) != NULL)
12185 return(1);
12186 return(0);
12187 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012188 if (ctxt->progressive == XML_PARSER_PI) {
12189 if (memchr(chunk, '>', size) != NULL)
12190 return(1);
12191 return(0);
12192 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012193 if (ctxt->instate == XML_PARSER_END_TAG) {
12194 if (memchr(chunk, '>', size) != NULL)
12195 return(1);
12196 return(0);
12197 }
12198 if ((ctxt->progressive == XML_PARSER_DTD) ||
12199 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012200 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012201 return(1);
12202 return(0);
12203 }
Daniel Veillard65686452012-07-19 18:25:01 +080012204 return(1);
12205}
12206
12207/**
Owen Taylor3473f882001-02-23 17:55:21 +000012208 * xmlParseChunk:
12209 * @ctxt: an XML parser context
12210 * @chunk: an char array
12211 * @size: the size in byte of the chunk
12212 * @terminate: last chunk indicator
12213 *
12214 * Parse a Chunk of memory
12215 *
12216 * Returns zero if no error, the xmlParserErrors otherwise.
12217 */
12218int
12219xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12220 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012221 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012222 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012223 size_t old_avail = 0;
12224 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012225
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012226 if (ctxt == NULL)
12227 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012228 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012229 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012230 if (ctxt->instate == XML_PARSER_EOF)
12231 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012232 if (ctxt->instate == XML_PARSER_START)
12233 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012234 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12235 (chunk[size - 1] == '\r')) {
12236 end_in_lf = 1;
12237 size--;
12238 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012239
12240xmldecl_done:
12241
Owen Taylor3473f882001-02-23 17:55:21 +000012242 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12243 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012244 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12245 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012246 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012247
Daniel Veillard65686452012-07-19 18:25:01 +080012248 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012249 /*
12250 * Specific handling if we autodetected an encoding, we should not
12251 * push more than the first line ... which depend on the encoding
12252 * And only push the rest once the final encoding was detected
12253 */
12254 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12255 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012256 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012257
12258 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12259 BAD_CAST "UTF-16")) ||
12260 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12261 BAD_CAST "UTF16")))
12262 len = 90;
12263 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12264 BAD_CAST "UCS-4")) ||
12265 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12266 BAD_CAST "UCS4")))
12267 len = 180;
12268
12269 if (ctxt->input->buf->rawconsumed < len)
12270 len -= ctxt->input->buf->rawconsumed;
12271
Raul Hudeaba9716a2010-03-15 10:13:29 +010012272 /*
12273 * Change size for reading the initial declaration only
12274 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12275 * will blindly copy extra bytes from memory.
12276 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012277 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012278 remain = size - len;
12279 size = len;
12280 } else {
12281 remain = 0;
12282 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012283 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012284 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012285 if (res < 0) {
12286 ctxt->errNo = XML_PARSER_EOF;
12287 ctxt->disableSAX = 1;
12288 return (XML_PARSER_EOF);
12289 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012290 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012291#ifdef DEBUG_PUSH
12292 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12293#endif
12294
Owen Taylor3473f882001-02-23 17:55:21 +000012295 } else if (ctxt->instate != XML_PARSER_EOF) {
12296 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12297 xmlParserInputBufferPtr in = ctxt->input->buf;
12298 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12299 (in->raw != NULL)) {
12300 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012301 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12302 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012303
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012304 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012305 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012306 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012307 xmlGenericError(xmlGenericErrorContext,
12308 "xmlParseChunk: encoder error\n");
12309 return(XML_ERR_INVALID_ENCODING);
12310 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012311 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012312 }
12313 }
12314 }
Daniel Veillard65686452012-07-19 18:25:01 +080012315 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012316 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012317 } else {
12318 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12319 avail = xmlBufUse(ctxt->input->buf->buffer);
12320 /*
12321 * Depending on the current state it may not be such
12322 * a good idea to try parsing if there is nothing in the chunk
12323 * which would be worth doing a parser state transition and we
12324 * need to wait for more data
12325 */
12326 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12327 (old_avail == 0) || (avail == 0) ||
12328 (xmlParseCheckTransition(ctxt,
12329 (const char *)&ctxt->input->base[old_avail],
12330 avail - old_avail)))
12331 xmlParseTryOrFinish(ctxt, terminate);
12332 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012333 if (ctxt->instate == XML_PARSER_EOF)
12334 return(ctxt->errNo);
12335
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012336 if ((ctxt->input != NULL) &&
12337 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12338 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12339 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12340 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12341 ctxt->instate = XML_PARSER_EOF;
12342 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012343 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12344 return(ctxt->errNo);
12345
12346 if (remain != 0) {
12347 chunk += size;
12348 size = remain;
12349 remain = 0;
12350 goto xmldecl_done;
12351 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012352 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12353 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012354 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12355 ctxt->input);
12356 size_t current = ctxt->input->cur - ctxt->input->base;
12357
Daniel Veillarda617e242006-01-09 14:38:44 +000012358 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012359
12360 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12361 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012362 }
Owen Taylor3473f882001-02-23 17:55:21 +000012363 if (terminate) {
12364 /*
12365 * Check for termination
12366 */
Daniel Veillard65686452012-07-19 18:25:01 +080012367 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012368
12369 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012370 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012371 cur_avail = ctxt->input->length -
12372 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012373 else
Daniel Veillard65686452012-07-19 18:25:01 +080012374 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12375 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012376 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012377
Owen Taylor3473f882001-02-23 17:55:21 +000012378 if ((ctxt->instate != XML_PARSER_EOF) &&
12379 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012380 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012381 }
Daniel Veillard65686452012-07-19 18:25:01 +080012382 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012383 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012384 }
Owen Taylor3473f882001-02-23 17:55:21 +000012385 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012386 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012387 ctxt->sax->endDocument(ctxt->userData);
12388 }
12389 ctxt->instate = XML_PARSER_EOF;
12390 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012391 if (ctxt->wellFormed == 0)
12392 return((xmlParserErrors) ctxt->errNo);
12393 else
12394 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012395}
12396
12397/************************************************************************
12398 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012399 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012400 * *
12401 ************************************************************************/
12402
12403/**
Owen Taylor3473f882001-02-23 17:55:21 +000012404 * xmlCreatePushParserCtxt:
12405 * @sax: a SAX handler
12406 * @user_data: The user data returned on SAX callbacks
12407 * @chunk: a pointer to an array of chars
12408 * @size: number of chars in the array
12409 * @filename: an optional file name or URI
12410 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012411 * Create a parser context for using the XML parser in push mode.
12412 * If @buffer and @size are non-NULL, the data is used to detect
12413 * the encoding. The remaining characters will be parsed so they
12414 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012415 * To allow content encoding detection, @size should be >= 4
12416 * The value of @filename is used for fetching external entities
12417 * and error/warning reports.
12418 *
12419 * Returns the new parser context or NULL
12420 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012421
Owen Taylor3473f882001-02-23 17:55:21 +000012422xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012423xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012424 const char *chunk, int size, const char *filename) {
12425 xmlParserCtxtPtr ctxt;
12426 xmlParserInputPtr inputStream;
12427 xmlParserInputBufferPtr buf;
12428 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12429
12430 /*
12431 * plug some encoding conversion routines
12432 */
12433 if ((chunk != NULL) && (size >= 4))
12434 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12435
12436 buf = xmlAllocParserInputBuffer(enc);
12437 if (buf == NULL) return(NULL);
12438
12439 ctxt = xmlNewParserCtxt();
12440 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012441 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012442 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012443 return(NULL);
12444 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012445 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012446 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12447 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012448 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012449 xmlFreeParserInputBuffer(buf);
12450 xmlFreeParserCtxt(ctxt);
12451 return(NULL);
12452 }
Owen Taylor3473f882001-02-23 17:55:21 +000012453 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012454#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012455 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012456#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012457 xmlFree(ctxt->sax);
12458 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12459 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012460 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012461 xmlFreeParserInputBuffer(buf);
12462 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012463 return(NULL);
12464 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012465 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12466 if (sax->initialized == XML_SAX2_MAGIC)
12467 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12468 else
12469 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012470 if (user_data != NULL)
12471 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012472 }
Owen Taylor3473f882001-02-23 17:55:21 +000012473 if (filename == NULL) {
12474 ctxt->directory = NULL;
12475 } else {
12476 ctxt->directory = xmlParserGetDirectory(filename);
12477 }
12478
12479 inputStream = xmlNewInputStream(ctxt);
12480 if (inputStream == NULL) {
12481 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012482 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012483 return(NULL);
12484 }
12485
12486 if (filename == NULL)
12487 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012488 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012489 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012490 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012491 if (inputStream->filename == NULL) {
12492 xmlFreeParserCtxt(ctxt);
12493 xmlFreeParserInputBuffer(buf);
12494 return(NULL);
12495 }
12496 }
Owen Taylor3473f882001-02-23 17:55:21 +000012497 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012498 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012499 inputPush(ctxt, inputStream);
12500
William M. Brack3a1cd212005-02-11 14:35:54 +000012501 /*
12502 * If the caller didn't provide an initial 'chunk' for determining
12503 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12504 * that it can be automatically determined later
12505 */
12506 if ((size == 0) || (chunk == NULL)) {
12507 ctxt->charset = XML_CHAR_ENCODING_NONE;
12508 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012509 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12510 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012511
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012512 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012513
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012514 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012515#ifdef DEBUG_PUSH
12516 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12517#endif
12518 }
12519
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012520 if (enc != XML_CHAR_ENCODING_NONE) {
12521 xmlSwitchEncoding(ctxt, enc);
12522 }
12523
Owen Taylor3473f882001-02-23 17:55:21 +000012524 return(ctxt);
12525}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012526#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012527
12528/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012529 * xmlStopParser:
12530 * @ctxt: an XML parser context
12531 *
12532 * Blocks further parser processing
12533 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012534void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012535xmlStopParser(xmlParserCtxtPtr ctxt) {
12536 if (ctxt == NULL)
12537 return;
12538 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarde50ba812013-04-11 15:54:51 +080012539 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012540 ctxt->disableSAX = 1;
12541 if (ctxt->input != NULL) {
12542 ctxt->input->cur = BAD_CAST"";
12543 ctxt->input->base = ctxt->input->cur;
12544 }
12545}
12546
12547/**
Owen Taylor3473f882001-02-23 17:55:21 +000012548 * xmlCreateIOParserCtxt:
12549 * @sax: a SAX handler
12550 * @user_data: The user data returned on SAX callbacks
12551 * @ioread: an I/O read function
12552 * @ioclose: an I/O close function
12553 * @ioctx: an I/O handler
12554 * @enc: the charset encoding if known
12555 *
12556 * Create a parser context for using the XML parser with an existing
12557 * I/O stream
12558 *
12559 * Returns the new parser context or NULL
12560 */
12561xmlParserCtxtPtr
12562xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12563 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12564 void *ioctx, xmlCharEncoding enc) {
12565 xmlParserCtxtPtr ctxt;
12566 xmlParserInputPtr inputStream;
12567 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012568
Daniel Veillard42595322004-11-08 10:52:06 +000012569 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012570
12571 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012572 if (buf == NULL) {
12573 if (ioclose != NULL)
12574 ioclose(ioctx);
12575 return (NULL);
12576 }
Owen Taylor3473f882001-02-23 17:55:21 +000012577
12578 ctxt = xmlNewParserCtxt();
12579 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012580 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012581 return(NULL);
12582 }
12583 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012584#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012585 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012586#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012587 xmlFree(ctxt->sax);
12588 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12589 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012590 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012591 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012592 return(NULL);
12593 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012594 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12595 if (sax->initialized == XML_SAX2_MAGIC)
12596 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12597 else
12598 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012599 if (user_data != NULL)
12600 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012601 }
Owen Taylor3473f882001-02-23 17:55:21 +000012602
12603 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12604 if (inputStream == NULL) {
12605 xmlFreeParserCtxt(ctxt);
12606 return(NULL);
12607 }
12608 inputPush(ctxt, inputStream);
12609
12610 return(ctxt);
12611}
12612
Daniel Veillard4432df22003-09-28 18:58:27 +000012613#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012614/************************************************************************
12615 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012616 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012617 * *
12618 ************************************************************************/
12619
12620/**
12621 * xmlIOParseDTD:
12622 * @sax: the SAX handler block or NULL
12623 * @input: an Input Buffer
12624 * @enc: the charset encoding if known
12625 *
12626 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012627 *
Owen Taylor3473f882001-02-23 17:55:21 +000012628 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012629 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012630 */
12631
12632xmlDtdPtr
12633xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12634 xmlCharEncoding enc) {
12635 xmlDtdPtr ret = NULL;
12636 xmlParserCtxtPtr ctxt;
12637 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012638 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012639
12640 if (input == NULL)
12641 return(NULL);
12642
12643 ctxt = xmlNewParserCtxt();
12644 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012645 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012646 return(NULL);
12647 }
12648
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012649 /* We are loading a DTD */
12650 ctxt->options |= XML_PARSE_DTDLOAD;
12651
Owen Taylor3473f882001-02-23 17:55:21 +000012652 /*
12653 * Set-up the SAX context
12654 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012655 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012656 if (ctxt->sax != NULL)
12657 xmlFree(ctxt->sax);
12658 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012659 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012660 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012661 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012662
12663 /*
12664 * generate a parser input from the I/O handler
12665 */
12666
Daniel Veillard43caefb2003-12-07 19:32:22 +000012667 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012668 if (pinput == NULL) {
12669 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012670 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012671 xmlFreeParserCtxt(ctxt);
12672 return(NULL);
12673 }
12674
12675 /*
12676 * plug some encoding conversion routines here.
12677 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012678 if (xmlPushInput(ctxt, pinput) < 0) {
12679 if (sax != NULL) ctxt->sax = NULL;
12680 xmlFreeParserCtxt(ctxt);
12681 return(NULL);
12682 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012683 if (enc != XML_CHAR_ENCODING_NONE) {
12684 xmlSwitchEncoding(ctxt, enc);
12685 }
Owen Taylor3473f882001-02-23 17:55:21 +000012686
12687 pinput->filename = NULL;
12688 pinput->line = 1;
12689 pinput->col = 1;
12690 pinput->base = ctxt->input->cur;
12691 pinput->cur = ctxt->input->cur;
12692 pinput->free = NULL;
12693
12694 /*
12695 * let's parse that entity knowing it's an external subset.
12696 */
12697 ctxt->inSubset = 2;
12698 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012699 if (ctxt->myDoc == NULL) {
12700 xmlErrMemory(ctxt, "New Doc failed");
12701 return(NULL);
12702 }
12703 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012704 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12705 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012706
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012707 if ((enc == XML_CHAR_ENCODING_NONE) &&
12708 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012709 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012710 * Get the 4 first bytes and decode the charset
12711 * if enc != XML_CHAR_ENCODING_NONE
12712 * plug some encoding conversion routines.
12713 */
12714 start[0] = RAW;
12715 start[1] = NXT(1);
12716 start[2] = NXT(2);
12717 start[3] = NXT(3);
12718 enc = xmlDetectCharEncoding(start, 4);
12719 if (enc != XML_CHAR_ENCODING_NONE) {
12720 xmlSwitchEncoding(ctxt, enc);
12721 }
12722 }
12723
Owen Taylor3473f882001-02-23 17:55:21 +000012724 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12725
12726 if (ctxt->myDoc != NULL) {
12727 if (ctxt->wellFormed) {
12728 ret = ctxt->myDoc->extSubset;
12729 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012730 if (ret != NULL) {
12731 xmlNodePtr tmp;
12732
12733 ret->doc = NULL;
12734 tmp = ret->children;
12735 while (tmp != NULL) {
12736 tmp->doc = NULL;
12737 tmp = tmp->next;
12738 }
12739 }
Owen Taylor3473f882001-02-23 17:55:21 +000012740 } else {
12741 ret = NULL;
12742 }
12743 xmlFreeDoc(ctxt->myDoc);
12744 ctxt->myDoc = NULL;
12745 }
12746 if (sax != NULL) ctxt->sax = NULL;
12747 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012748
Owen Taylor3473f882001-02-23 17:55:21 +000012749 return(ret);
12750}
12751
12752/**
12753 * xmlSAXParseDTD:
12754 * @sax: the SAX handler block
12755 * @ExternalID: a NAME* containing the External ID of the DTD
12756 * @SystemID: a NAME* containing the URL to the DTD
12757 *
12758 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012759 *
Owen Taylor3473f882001-02-23 17:55:21 +000012760 * Returns the resulting xmlDtdPtr or NULL in case of error.
12761 */
12762
12763xmlDtdPtr
12764xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12765 const xmlChar *SystemID) {
12766 xmlDtdPtr ret = NULL;
12767 xmlParserCtxtPtr ctxt;
12768 xmlParserInputPtr input = NULL;
12769 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012770 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012771
12772 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12773
12774 ctxt = xmlNewParserCtxt();
12775 if (ctxt == NULL) {
12776 return(NULL);
12777 }
12778
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012779 /* We are loading a DTD */
12780 ctxt->options |= XML_PARSE_DTDLOAD;
12781
Owen Taylor3473f882001-02-23 17:55:21 +000012782 /*
12783 * Set-up the SAX context
12784 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012785 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012786 if (ctxt->sax != NULL)
12787 xmlFree(ctxt->sax);
12788 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012789 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012790 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012791
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012792 /*
12793 * Canonicalise the system ID
12794 */
12795 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012796 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012797 xmlFreeParserCtxt(ctxt);
12798 return(NULL);
12799 }
Owen Taylor3473f882001-02-23 17:55:21 +000012800
12801 /*
12802 * Ask the Entity resolver to load the damn thing
12803 */
12804
12805 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012806 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12807 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012808 if (input == NULL) {
12809 if (sax != NULL) ctxt->sax = NULL;
12810 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012811 if (systemIdCanonic != NULL)
12812 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012813 return(NULL);
12814 }
12815
12816 /*
12817 * plug some encoding conversion routines here.
12818 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012819 if (xmlPushInput(ctxt, input) < 0) {
12820 if (sax != NULL) ctxt->sax = NULL;
12821 xmlFreeParserCtxt(ctxt);
12822 if (systemIdCanonic != NULL)
12823 xmlFree(systemIdCanonic);
12824 return(NULL);
12825 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012826 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12827 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12828 xmlSwitchEncoding(ctxt, enc);
12829 }
Owen Taylor3473f882001-02-23 17:55:21 +000012830
12831 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012832 input->filename = (char *) systemIdCanonic;
12833 else
12834 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012835 input->line = 1;
12836 input->col = 1;
12837 input->base = ctxt->input->cur;
12838 input->cur = ctxt->input->cur;
12839 input->free = NULL;
12840
12841 /*
12842 * let's parse that entity knowing it's an external subset.
12843 */
12844 ctxt->inSubset = 2;
12845 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012846 if (ctxt->myDoc == NULL) {
12847 xmlErrMemory(ctxt, "New Doc failed");
12848 if (sax != NULL) ctxt->sax = NULL;
12849 xmlFreeParserCtxt(ctxt);
12850 return(NULL);
12851 }
12852 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012853 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12854 ExternalID, SystemID);
12855 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12856
12857 if (ctxt->myDoc != NULL) {
12858 if (ctxt->wellFormed) {
12859 ret = ctxt->myDoc->extSubset;
12860 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012861 if (ret != NULL) {
12862 xmlNodePtr tmp;
12863
12864 ret->doc = NULL;
12865 tmp = ret->children;
12866 while (tmp != NULL) {
12867 tmp->doc = NULL;
12868 tmp = tmp->next;
12869 }
12870 }
Owen Taylor3473f882001-02-23 17:55:21 +000012871 } else {
12872 ret = NULL;
12873 }
12874 xmlFreeDoc(ctxt->myDoc);
12875 ctxt->myDoc = NULL;
12876 }
12877 if (sax != NULL) ctxt->sax = NULL;
12878 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012879
Owen Taylor3473f882001-02-23 17:55:21 +000012880 return(ret);
12881}
12882
Daniel Veillard4432df22003-09-28 18:58:27 +000012883
Owen Taylor3473f882001-02-23 17:55:21 +000012884/**
12885 * xmlParseDTD:
12886 * @ExternalID: a NAME* containing the External ID of the DTD
12887 * @SystemID: a NAME* containing the URL to the DTD
12888 *
12889 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012890 *
Owen Taylor3473f882001-02-23 17:55:21 +000012891 * Returns the resulting xmlDtdPtr or NULL in case of error.
12892 */
12893
12894xmlDtdPtr
12895xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12896 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12897}
Daniel Veillard4432df22003-09-28 18:58:27 +000012898#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012899
12900/************************************************************************
12901 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012902 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012903 * *
12904 ************************************************************************/
12905
12906/**
Owen Taylor3473f882001-02-23 17:55:21 +000012907 * xmlParseCtxtExternalEntity:
12908 * @ctx: the existing parsing context
12909 * @URL: the URL for the entity to load
12910 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012911 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012912 *
12913 * Parse an external general entity within an existing parsing context
12914 * An external general parsed entity is well-formed if it matches the
12915 * production labeled extParsedEnt.
12916 *
12917 * [78] extParsedEnt ::= TextDecl? content
12918 *
12919 * Returns 0 if the entity is well formed, -1 in case of args problem and
12920 * the parser error code otherwise
12921 */
12922
12923int
12924xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012925 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012926 xmlParserCtxtPtr ctxt;
12927 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012928 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012929 xmlSAXHandlerPtr oldsax = NULL;
12930 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012931 xmlChar start[4];
12932 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012933
Daniel Veillardce682bc2004-11-05 17:22:25 +000012934 if (ctx == NULL) return(-1);
12935
Daniel Veillard0161e632008-08-28 15:36:32 +000012936 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12937 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012938 return(XML_ERR_ENTITY_LOOP);
12939 }
12940
Daniel Veillardcda96922001-08-21 10:56:31 +000012941 if (lst != NULL)
12942 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012943 if ((URL == NULL) && (ID == NULL))
12944 return(-1);
12945 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12946 return(-1);
12947
Rob Richards798743a2009-06-19 13:54:25 -040012948 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012949 if (ctxt == NULL) {
12950 return(-1);
12951 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012952
Owen Taylor3473f882001-02-23 17:55:21 +000012953 oldsax = ctxt->sax;
12954 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012955 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012956 newDoc = xmlNewDoc(BAD_CAST "1.0");
12957 if (newDoc == NULL) {
12958 xmlFreeParserCtxt(ctxt);
12959 return(-1);
12960 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012961 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012962 if (ctx->myDoc->dict) {
12963 newDoc->dict = ctx->myDoc->dict;
12964 xmlDictReference(newDoc->dict);
12965 }
Owen Taylor3473f882001-02-23 17:55:21 +000012966 if (ctx->myDoc != NULL) {
12967 newDoc->intSubset = ctx->myDoc->intSubset;
12968 newDoc->extSubset = ctx->myDoc->extSubset;
12969 }
12970 if (ctx->myDoc->URL != NULL) {
12971 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12972 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012973 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12974 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012975 ctxt->sax = oldsax;
12976 xmlFreeParserCtxt(ctxt);
12977 newDoc->intSubset = NULL;
12978 newDoc->extSubset = NULL;
12979 xmlFreeDoc(newDoc);
12980 return(-1);
12981 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012982 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012983 nodePush(ctxt, newDoc->children);
12984 if (ctx->myDoc == NULL) {
12985 ctxt->myDoc = newDoc;
12986 } else {
12987 ctxt->myDoc = ctx->myDoc;
12988 newDoc->children->doc = ctx->myDoc;
12989 }
12990
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012991 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012992 * Get the 4 first bytes and decode the charset
12993 * if enc != XML_CHAR_ENCODING_NONE
12994 * plug some encoding conversion routines.
12995 */
12996 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012997 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12998 start[0] = RAW;
12999 start[1] = NXT(1);
13000 start[2] = NXT(2);
13001 start[3] = NXT(3);
13002 enc = xmlDetectCharEncoding(start, 4);
13003 if (enc != XML_CHAR_ENCODING_NONE) {
13004 xmlSwitchEncoding(ctxt, enc);
13005 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013006 }
13007
Owen Taylor3473f882001-02-23 17:55:21 +000013008 /*
13009 * Parse a possible text declaration first
13010 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013011 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013012 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013013 /*
13014 * An XML-1.0 document can't reference an entity not XML-1.0
13015 */
13016 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13017 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013018 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013019 "Version mismatch between document and entity\n");
13020 }
Owen Taylor3473f882001-02-23 17:55:21 +000013021 }
13022
13023 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080013024 * If the user provided its own SAX callbacks then reuse the
13025 * useData callback field, otherwise the expected setup in a
13026 * DOM builder is to have userData == ctxt
13027 */
13028 if (ctx->userData == ctx)
13029 ctxt->userData = ctxt;
13030 else
13031 ctxt->userData = ctx->userData;
13032
13033 /*
Owen Taylor3473f882001-02-23 17:55:21 +000013034 * Doing validity checking on chunk doesn't make sense
13035 */
13036 ctxt->instate = XML_PARSER_CONTENT;
13037 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013038 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013039 ctxt->loadsubset = ctx->loadsubset;
13040 ctxt->depth = ctx->depth + 1;
13041 ctxt->replaceEntities = ctx->replaceEntities;
13042 if (ctxt->validate) {
13043 ctxt->vctxt.error = ctx->vctxt.error;
13044 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000013045 } else {
13046 ctxt->vctxt.error = NULL;
13047 ctxt->vctxt.warning = NULL;
13048 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000013049 ctxt->vctxt.nodeTab = NULL;
13050 ctxt->vctxt.nodeNr = 0;
13051 ctxt->vctxt.nodeMax = 0;
13052 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013053 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13054 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013055 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13056 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13057 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013058 ctxt->dictNames = ctx->dictNames;
13059 ctxt->attsDefault = ctx->attsDefault;
13060 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013061 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013062
13063 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013064
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013065 ctx->validate = ctxt->validate;
13066 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013067 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013068 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013069 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013070 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013071 }
13072 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013073 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013074 }
13075
13076 if (!ctxt->wellFormed) {
13077 if (ctxt->errNo == 0)
13078 ret = 1;
13079 else
13080 ret = ctxt->errNo;
13081 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013082 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013083 xmlNodePtr cur;
13084
13085 /*
13086 * Return the newly created nodeset after unlinking it from
13087 * they pseudo parent.
13088 */
13089 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013090 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013091 while (cur != NULL) {
13092 cur->parent = NULL;
13093 cur = cur->next;
13094 }
13095 newDoc->children->children = NULL;
13096 }
13097 ret = 0;
13098 }
13099 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013100 ctxt->dict = NULL;
13101 ctxt->attsDefault = NULL;
13102 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013103 xmlFreeParserCtxt(ctxt);
13104 newDoc->intSubset = NULL;
13105 newDoc->extSubset = NULL;
13106 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013107
Owen Taylor3473f882001-02-23 17:55:21 +000013108 return(ret);
13109}
13110
13111/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013112 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013113 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013114 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013115 * @sax: the SAX handler bloc (possibly NULL)
13116 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13117 * @depth: Used for loop detection, use 0
13118 * @URL: the URL for the entity to load
13119 * @ID: the System ID for the entity to load
13120 * @list: the return value for the set of parsed nodes
13121 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013122 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013123 *
13124 * Returns 0 if the entity is well formed, -1 in case of args problem and
13125 * the parser error code otherwise
13126 */
13127
Daniel Veillard7d515752003-09-26 19:12:37 +000013128static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013129xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13130 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013131 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013132 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013133 xmlParserCtxtPtr ctxt;
13134 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013135 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013136 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013137 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013138 xmlChar start[4];
13139 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013140
Daniel Veillard0161e632008-08-28 15:36:32 +000013141 if (((depth > 40) &&
13142 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13143 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013144 return(XML_ERR_ENTITY_LOOP);
13145 }
13146
Owen Taylor3473f882001-02-23 17:55:21 +000013147 if (list != NULL)
13148 *list = NULL;
13149 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013150 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013151 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013152 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013153
13154
Rob Richards9c0aa472009-03-26 18:10:19 +000013155 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013156 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013157 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013158 if (oldctxt != NULL) {
13159 ctxt->_private = oldctxt->_private;
13160 ctxt->loadsubset = oldctxt->loadsubset;
13161 ctxt->validate = oldctxt->validate;
13162 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013163 ctxt->record_info = oldctxt->record_info;
13164 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13165 ctxt->node_seq.length = oldctxt->node_seq.length;
13166 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013167 } else {
13168 /*
13169 * Doing validity checking on chunk without context
13170 * doesn't make sense
13171 */
13172 ctxt->_private = NULL;
13173 ctxt->validate = 0;
13174 ctxt->external = 2;
13175 ctxt->loadsubset = 0;
13176 }
Owen Taylor3473f882001-02-23 17:55:21 +000013177 if (sax != NULL) {
13178 oldsax = ctxt->sax;
13179 ctxt->sax = sax;
13180 if (user_data != NULL)
13181 ctxt->userData = user_data;
13182 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013183 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013184 newDoc = xmlNewDoc(BAD_CAST "1.0");
13185 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013186 ctxt->node_seq.maximum = 0;
13187 ctxt->node_seq.length = 0;
13188 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013189 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013190 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013191 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013192 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013193 newDoc->intSubset = doc->intSubset;
13194 newDoc->extSubset = doc->extSubset;
13195 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013196 xmlDictReference(newDoc->dict);
13197
Owen Taylor3473f882001-02-23 17:55:21 +000013198 if (doc->URL != NULL) {
13199 newDoc->URL = xmlStrdup(doc->URL);
13200 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013201 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13202 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013203 if (sax != NULL)
13204 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013205 ctxt->node_seq.maximum = 0;
13206 ctxt->node_seq.length = 0;
13207 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013208 xmlFreeParserCtxt(ctxt);
13209 newDoc->intSubset = NULL;
13210 newDoc->extSubset = NULL;
13211 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013212 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013213 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013214 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013215 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013216 ctxt->myDoc = doc;
13217 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013218
Daniel Veillard0161e632008-08-28 15:36:32 +000013219 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013220 * Get the 4 first bytes and decode the charset
13221 * if enc != XML_CHAR_ENCODING_NONE
13222 * plug some encoding conversion routines.
13223 */
13224 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013225 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13226 start[0] = RAW;
13227 start[1] = NXT(1);
13228 start[2] = NXT(2);
13229 start[3] = NXT(3);
13230 enc = xmlDetectCharEncoding(start, 4);
13231 if (enc != XML_CHAR_ENCODING_NONE) {
13232 xmlSwitchEncoding(ctxt, enc);
13233 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013234 }
13235
Owen Taylor3473f882001-02-23 17:55:21 +000013236 /*
13237 * Parse a possible text declaration first
13238 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013239 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013240 xmlParseTextDecl(ctxt);
13241 }
13242
Owen Taylor3473f882001-02-23 17:55:21 +000013243 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013244 ctxt->depth = depth;
13245
13246 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013247
Daniel Veillard561b7f82002-03-20 21:55:57 +000013248 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013249 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013250 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013251 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013252 }
13253 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013254 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013255 }
13256
13257 if (!ctxt->wellFormed) {
13258 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013259 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013260 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013261 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013262 } else {
13263 if (list != NULL) {
13264 xmlNodePtr cur;
13265
13266 /*
13267 * Return the newly created nodeset after unlinking it from
13268 * they pseudo parent.
13269 */
13270 cur = newDoc->children->children;
13271 *list = cur;
13272 while (cur != NULL) {
13273 cur->parent = NULL;
13274 cur = cur->next;
13275 }
13276 newDoc->children->children = NULL;
13277 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013278 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013279 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013280
13281 /*
13282 * Record in the parent context the number of entities replacement
13283 * done when parsing that reference.
13284 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013285 if (oldctxt != NULL)
13286 oldctxt->nbentities += ctxt->nbentities;
13287
Daniel Veillard0161e632008-08-28 15:36:32 +000013288 /*
13289 * Also record the size of the entity parsed
13290 */
13291 if (ctxt->input != NULL) {
13292 oldctxt->sizeentities += ctxt->input->consumed;
13293 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13294 }
13295 /*
13296 * And record the last error if any
13297 */
13298 if (ctxt->lastError.code != XML_ERR_OK)
13299 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13300
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013301 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013302 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013303 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13304 oldctxt->node_seq.length = ctxt->node_seq.length;
13305 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013306 ctxt->node_seq.maximum = 0;
13307 ctxt->node_seq.length = 0;
13308 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013309 xmlFreeParserCtxt(ctxt);
13310 newDoc->intSubset = NULL;
13311 newDoc->extSubset = NULL;
13312 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013313
Owen Taylor3473f882001-02-23 17:55:21 +000013314 return(ret);
13315}
13316
Daniel Veillard81273902003-09-30 00:43:48 +000013317#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013318/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013319 * xmlParseExternalEntity:
13320 * @doc: the document the chunk pertains to
13321 * @sax: the SAX handler bloc (possibly NULL)
13322 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13323 * @depth: Used for loop detection, use 0
13324 * @URL: the URL for the entity to load
13325 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013326 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013327 *
13328 * Parse an external general entity
13329 * An external general parsed entity is well-formed if it matches the
13330 * production labeled extParsedEnt.
13331 *
13332 * [78] extParsedEnt ::= TextDecl? content
13333 *
13334 * Returns 0 if the entity is well formed, -1 in case of args problem and
13335 * the parser error code otherwise
13336 */
13337
13338int
13339xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013340 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013341 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013342 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013343}
13344
13345/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013346 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013347 * @doc: the document the chunk pertains to
13348 * @sax: the SAX handler bloc (possibly NULL)
13349 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13350 * @depth: Used for loop detection, use 0
13351 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013352 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013353 *
13354 * Parse a well-balanced chunk of an XML document
13355 * called by the parser
13356 * The allowed sequence for the Well Balanced Chunk is the one defined by
13357 * the content production in the XML grammar:
13358 *
13359 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13360 *
13361 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13362 * the parser error code otherwise
13363 */
13364
13365int
13366xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013367 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013368 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13369 depth, string, lst, 0 );
13370}
Daniel Veillard81273902003-09-30 00:43:48 +000013371#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013372
13373/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013374 * xmlParseBalancedChunkMemoryInternal:
13375 * @oldctxt: the existing parsing context
13376 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13377 * @user_data: the user data field for the parser context
13378 * @lst: the return value for the set of parsed nodes
13379 *
13380 *
13381 * Parse a well-balanced chunk of an XML document
13382 * called by the parser
13383 * The allowed sequence for the Well Balanced Chunk is the one defined by
13384 * the content production in the XML grammar:
13385 *
13386 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13387 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013388 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13389 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013390 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013391 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013392 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013393 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013394static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013395xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13396 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13397 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013398 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013399 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013400 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013401 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013402 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013403 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013404 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013405#ifdef SAX2
13406 int i;
13407#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013408
Daniel Veillard0161e632008-08-28 15:36:32 +000013409 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13410 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013411 return(XML_ERR_ENTITY_LOOP);
13412 }
13413
13414
13415 if (lst != NULL)
13416 *lst = NULL;
13417 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013418 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013419
13420 size = xmlStrlen(string);
13421
13422 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013423 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013424 if (user_data != NULL)
13425 ctxt->userData = user_data;
13426 else
13427 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013428 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13429 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013430 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13431 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13432 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013433
Daniel Veillard74eaec12009-08-26 15:57:20 +020013434#ifdef SAX2
13435 /* propagate namespaces down the entity */
13436 for (i = 0;i < oldctxt->nsNr;i += 2) {
13437 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13438 }
13439#endif
13440
Daniel Veillard328f48c2002-11-15 15:24:34 +000013441 oldsax = ctxt->sax;
13442 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013443 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013444 ctxt->replaceEntities = oldctxt->replaceEntities;
13445 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013446
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013447 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013448 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013449 newDoc = xmlNewDoc(BAD_CAST "1.0");
13450 if (newDoc == NULL) {
13451 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013452 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013453 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013454 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013455 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013456 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013457 newDoc->dict = ctxt->dict;
13458 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013459 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013460 } else {
13461 ctxt->myDoc = oldctxt->myDoc;
13462 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013463 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013464 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013465 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13466 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013467 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013468 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013469 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013470 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013471 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013472 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013473 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013474 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013475 ctxt->myDoc->children = NULL;
13476 ctxt->myDoc->last = NULL;
13477 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013478 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013479 ctxt->instate = XML_PARSER_CONTENT;
13480 ctxt->depth = oldctxt->depth + 1;
13481
Daniel Veillard328f48c2002-11-15 15:24:34 +000013482 ctxt->validate = 0;
13483 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013484 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13485 /*
13486 * ID/IDREF registration will be done in xmlValidateElement below
13487 */
13488 ctxt->loadsubset |= XML_SKIP_IDS;
13489 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013490 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013491 ctxt->attsDefault = oldctxt->attsDefault;
13492 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013493
Daniel Veillard68e9e742002-11-16 15:35:11 +000013494 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013495 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013496 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013497 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013498 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013499 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013500 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013501 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013502 }
13503
13504 if (!ctxt->wellFormed) {
13505 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013506 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013507 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013508 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013509 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013510 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013511 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013512
William M. Brack7b9154b2003-09-27 19:23:50 +000013513 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013514 xmlNodePtr cur;
13515
13516 /*
13517 * Return the newly created nodeset after unlinking it from
13518 * they pseudo parent.
13519 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013520 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013521 *lst = cur;
13522 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013523#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013524 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13525 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13526 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013527 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13528 oldctxt->myDoc, cur);
13529 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013530#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013531 cur->parent = NULL;
13532 cur = cur->next;
13533 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013534 ctxt->myDoc->children->children = NULL;
13535 }
13536 if (ctxt->myDoc != NULL) {
13537 xmlFreeNode(ctxt->myDoc->children);
13538 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013539 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013540 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013541
13542 /*
13543 * Record in the parent context the number of entities replacement
13544 * done when parsing that reference.
13545 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013546 if (oldctxt != NULL)
13547 oldctxt->nbentities += ctxt->nbentities;
13548
Daniel Veillard0161e632008-08-28 15:36:32 +000013549 /*
13550 * Also record the last error if any
13551 */
13552 if (ctxt->lastError.code != XML_ERR_OK)
13553 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13554
Daniel Veillard328f48c2002-11-15 15:24:34 +000013555 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013556 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013557 ctxt->attsDefault = NULL;
13558 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013559 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013560 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013561 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013562 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013563
Daniel Veillard328f48c2002-11-15 15:24:34 +000013564 return(ret);
13565}
13566
Daniel Veillard29b17482004-08-16 00:39:03 +000013567/**
13568 * xmlParseInNodeContext:
13569 * @node: the context node
13570 * @data: the input string
13571 * @datalen: the input string length in bytes
13572 * @options: a combination of xmlParserOption
13573 * @lst: the return value for the set of parsed nodes
13574 *
13575 * Parse a well-balanced chunk of an XML document
13576 * within the context (DTD, namespaces, etc ...) of the given node.
13577 *
13578 * The allowed sequence for the data is a Well Balanced Chunk defined by
13579 * the content production in the XML grammar:
13580 *
13581 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13582 *
13583 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13584 * error code otherwise
13585 */
13586xmlParserErrors
13587xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13588 int options, xmlNodePtr *lst) {
13589#ifdef SAX2
13590 xmlParserCtxtPtr ctxt;
13591 xmlDocPtr doc = NULL;
13592 xmlNodePtr fake, cur;
13593 int nsnr = 0;
13594
13595 xmlParserErrors ret = XML_ERR_OK;
13596
13597 /*
13598 * check all input parameters, grab the document
13599 */
13600 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13601 return(XML_ERR_INTERNAL_ERROR);
13602 switch (node->type) {
13603 case XML_ELEMENT_NODE:
13604 case XML_ATTRIBUTE_NODE:
13605 case XML_TEXT_NODE:
13606 case XML_CDATA_SECTION_NODE:
13607 case XML_ENTITY_REF_NODE:
13608 case XML_PI_NODE:
13609 case XML_COMMENT_NODE:
13610 case XML_DOCUMENT_NODE:
13611 case XML_HTML_DOCUMENT_NODE:
13612 break;
13613 default:
13614 return(XML_ERR_INTERNAL_ERROR);
13615
13616 }
13617 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13618 (node->type != XML_DOCUMENT_NODE) &&
13619 (node->type != XML_HTML_DOCUMENT_NODE))
13620 node = node->parent;
13621 if (node == NULL)
13622 return(XML_ERR_INTERNAL_ERROR);
13623 if (node->type == XML_ELEMENT_NODE)
13624 doc = node->doc;
13625 else
13626 doc = (xmlDocPtr) node;
13627 if (doc == NULL)
13628 return(XML_ERR_INTERNAL_ERROR);
13629
13630 /*
13631 * allocate a context and set-up everything not related to the
13632 * node position in the tree
13633 */
13634 if (doc->type == XML_DOCUMENT_NODE)
13635 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13636#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013637 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013638 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013639 /*
13640 * When parsing in context, it makes no sense to add implied
13641 * elements like html/body/etc...
13642 */
13643 options |= HTML_PARSE_NOIMPLIED;
13644 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013645#endif
13646 else
13647 return(XML_ERR_INTERNAL_ERROR);
13648
13649 if (ctxt == NULL)
13650 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013651
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013652 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013653 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13654 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13655 * we must wait until the last moment to free the original one.
13656 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013657 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013658 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013659 xmlDictFree(ctxt->dict);
13660 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013661 } else
13662 options |= XML_PARSE_NODICT;
13663
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013664 if (doc->encoding != NULL) {
13665 xmlCharEncodingHandlerPtr hdlr;
13666
13667 if (ctxt->encoding != NULL)
13668 xmlFree((xmlChar *) ctxt->encoding);
13669 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13670
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013671 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013672 if (hdlr != NULL) {
13673 xmlSwitchToEncoding(ctxt, hdlr);
13674 } else {
13675 return(XML_ERR_UNSUPPORTED_ENCODING);
13676 }
13677 }
13678
Daniel Veillard37334572008-07-31 08:20:02 +000013679 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013680 xmlDetectSAX2(ctxt);
13681 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013682 /* parsing in context, i.e. as within existing content */
13683 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013684
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013685 fake = xmlNewComment(NULL);
13686 if (fake == NULL) {
13687 xmlFreeParserCtxt(ctxt);
13688 return(XML_ERR_NO_MEMORY);
13689 }
13690 xmlAddChild(node, fake);
13691
Daniel Veillard29b17482004-08-16 00:39:03 +000013692 if (node->type == XML_ELEMENT_NODE) {
13693 nodePush(ctxt, node);
13694 /*
13695 * initialize the SAX2 namespaces stack
13696 */
13697 cur = node;
13698 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13699 xmlNsPtr ns = cur->nsDef;
13700 const xmlChar *iprefix, *ihref;
13701
13702 while (ns != NULL) {
13703 if (ctxt->dict) {
13704 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13705 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13706 } else {
13707 iprefix = ns->prefix;
13708 ihref = ns->href;
13709 }
13710
13711 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13712 nsPush(ctxt, iprefix, ihref);
13713 nsnr++;
13714 }
13715 ns = ns->next;
13716 }
13717 cur = cur->parent;
13718 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013719 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013720
13721 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13722 /*
13723 * ID/IDREF registration will be done in xmlValidateElement below
13724 */
13725 ctxt->loadsubset |= XML_SKIP_IDS;
13726 }
13727
Daniel Veillard499cc922006-01-18 17:22:35 +000013728#ifdef LIBXML_HTML_ENABLED
13729 if (doc->type == XML_HTML_DOCUMENT_NODE)
13730 __htmlParseContent(ctxt);
13731 else
13732#endif
13733 xmlParseContent(ctxt);
13734
Daniel Veillard29b17482004-08-16 00:39:03 +000013735 nsPop(ctxt, nsnr);
13736 if ((RAW == '<') && (NXT(1) == '/')) {
13737 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13738 } else if (RAW != 0) {
13739 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13740 }
13741 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13742 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13743 ctxt->wellFormed = 0;
13744 }
13745
13746 if (!ctxt->wellFormed) {
13747 if (ctxt->errNo == 0)
13748 ret = XML_ERR_INTERNAL_ERROR;
13749 else
13750 ret = (xmlParserErrors)ctxt->errNo;
13751 } else {
13752 ret = XML_ERR_OK;
13753 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013754
Daniel Veillard29b17482004-08-16 00:39:03 +000013755 /*
13756 * Return the newly created nodeset after unlinking it from
13757 * the pseudo sibling.
13758 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013759
Daniel Veillard29b17482004-08-16 00:39:03 +000013760 cur = fake->next;
13761 fake->next = NULL;
13762 node->last = fake;
13763
13764 if (cur != NULL) {
13765 cur->prev = NULL;
13766 }
13767
13768 *lst = cur;
13769
13770 while (cur != NULL) {
13771 cur->parent = NULL;
13772 cur = cur->next;
13773 }
13774
13775 xmlUnlinkNode(fake);
13776 xmlFreeNode(fake);
13777
13778
13779 if (ret != XML_ERR_OK) {
13780 xmlFreeNodeList(*lst);
13781 *lst = NULL;
13782 }
William M. Brackc3f81342004-10-03 01:22:44 +000013783
William M. Brackb7b54de2004-10-06 16:38:01 +000013784 if (doc->dict != NULL)
13785 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013786 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013787
Daniel Veillard29b17482004-08-16 00:39:03 +000013788 return(ret);
13789#else /* !SAX2 */
13790 return(XML_ERR_INTERNAL_ERROR);
13791#endif
13792}
13793
Daniel Veillard81273902003-09-30 00:43:48 +000013794#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013795/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013796 * xmlParseBalancedChunkMemoryRecover:
13797 * @doc: the document the chunk pertains to
13798 * @sax: the SAX handler bloc (possibly NULL)
13799 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13800 * @depth: Used for loop detection, use 0
13801 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13802 * @lst: the return value for the set of parsed nodes
13803 * @recover: return nodes even if the data is broken (use 0)
13804 *
13805 *
13806 * Parse a well-balanced chunk of an XML document
13807 * called by the parser
13808 * The allowed sequence for the Well Balanced Chunk is the one defined by
13809 * the content production in the XML grammar:
13810 *
13811 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13812 *
13813 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13814 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013815 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013816 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013817 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13818 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013819 */
13820int
13821xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013822 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013823 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013824 xmlParserCtxtPtr ctxt;
13825 xmlDocPtr newDoc;
13826 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013827 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013828 int size;
13829 int ret = 0;
13830
Daniel Veillard0161e632008-08-28 15:36:32 +000013831 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013832 return(XML_ERR_ENTITY_LOOP);
13833 }
13834
13835
Daniel Veillardcda96922001-08-21 10:56:31 +000013836 if (lst != NULL)
13837 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013838 if (string == NULL)
13839 return(-1);
13840
13841 size = xmlStrlen(string);
13842
13843 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13844 if (ctxt == NULL) return(-1);
13845 ctxt->userData = ctxt;
13846 if (sax != NULL) {
13847 oldsax = ctxt->sax;
13848 ctxt->sax = sax;
13849 if (user_data != NULL)
13850 ctxt->userData = user_data;
13851 }
13852 newDoc = xmlNewDoc(BAD_CAST "1.0");
13853 if (newDoc == NULL) {
13854 xmlFreeParserCtxt(ctxt);
13855 return(-1);
13856 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013857 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013858 if ((doc != NULL) && (doc->dict != NULL)) {
13859 xmlDictFree(ctxt->dict);
13860 ctxt->dict = doc->dict;
13861 xmlDictReference(ctxt->dict);
13862 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13863 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13864 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13865 ctxt->dictNames = 1;
13866 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013867 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013868 }
Owen Taylor3473f882001-02-23 17:55:21 +000013869 if (doc != NULL) {
13870 newDoc->intSubset = doc->intSubset;
13871 newDoc->extSubset = doc->extSubset;
13872 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013873 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13874 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013875 if (sax != NULL)
13876 ctxt->sax = oldsax;
13877 xmlFreeParserCtxt(ctxt);
13878 newDoc->intSubset = NULL;
13879 newDoc->extSubset = NULL;
13880 xmlFreeDoc(newDoc);
13881 return(-1);
13882 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013883 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13884 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013885 if (doc == NULL) {
13886 ctxt->myDoc = newDoc;
13887 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013888 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013889 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013890 /* Ensure that doc has XML spec namespace */
13891 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13892 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013893 }
13894 ctxt->instate = XML_PARSER_CONTENT;
13895 ctxt->depth = depth;
13896
13897 /*
13898 * Doing validity checking on chunk doesn't make sense
13899 */
13900 ctxt->validate = 0;
13901 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013902 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013903
Daniel Veillardb39bc392002-10-26 19:29:51 +000013904 if ( doc != NULL ){
13905 content = doc->children;
13906 doc->children = NULL;
13907 xmlParseContent(ctxt);
13908 doc->children = content;
13909 }
13910 else {
13911 xmlParseContent(ctxt);
13912 }
Owen Taylor3473f882001-02-23 17:55:21 +000013913 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013914 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013915 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013916 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013917 }
13918 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013919 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013920 }
13921
13922 if (!ctxt->wellFormed) {
13923 if (ctxt->errNo == 0)
13924 ret = 1;
13925 else
13926 ret = ctxt->errNo;
13927 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013928 ret = 0;
13929 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013930
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013931 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13932 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013933
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013934 /*
13935 * Return the newly created nodeset after unlinking it from
13936 * they pseudo parent.
13937 */
13938 cur = newDoc->children->children;
13939 *lst = cur;
13940 while (cur != NULL) {
13941 xmlSetTreeDoc(cur, doc);
13942 cur->parent = NULL;
13943 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013944 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013945 newDoc->children->children = NULL;
13946 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013947
13948 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013949 ctxt->sax = oldsax;
13950 xmlFreeParserCtxt(ctxt);
13951 newDoc->intSubset = NULL;
13952 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013953 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013954 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013955
Owen Taylor3473f882001-02-23 17:55:21 +000013956 return(ret);
13957}
13958
13959/**
13960 * xmlSAXParseEntity:
13961 * @sax: the SAX handler block
13962 * @filename: the filename
13963 *
13964 * parse an XML external entity out of context and build a tree.
13965 * It use the given SAX function block to handle the parsing callback.
13966 * If sax is NULL, fallback to the default DOM tree building routines.
13967 *
13968 * [78] extParsedEnt ::= TextDecl? content
13969 *
13970 * This correspond to a "Well Balanced" chunk
13971 *
13972 * Returns the resulting document tree
13973 */
13974
13975xmlDocPtr
13976xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13977 xmlDocPtr ret;
13978 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013979
13980 ctxt = xmlCreateFileParserCtxt(filename);
13981 if (ctxt == NULL) {
13982 return(NULL);
13983 }
13984 if (sax != NULL) {
13985 if (ctxt->sax != NULL)
13986 xmlFree(ctxt->sax);
13987 ctxt->sax = sax;
13988 ctxt->userData = NULL;
13989 }
13990
Owen Taylor3473f882001-02-23 17:55:21 +000013991 xmlParseExtParsedEnt(ctxt);
13992
13993 if (ctxt->wellFormed)
13994 ret = ctxt->myDoc;
13995 else {
13996 ret = NULL;
13997 xmlFreeDoc(ctxt->myDoc);
13998 ctxt->myDoc = NULL;
13999 }
14000 if (sax != NULL)
14001 ctxt->sax = NULL;
14002 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000014003
Owen Taylor3473f882001-02-23 17:55:21 +000014004 return(ret);
14005}
14006
14007/**
14008 * xmlParseEntity:
14009 * @filename: the filename
14010 *
14011 * parse an XML external entity out of context and build a tree.
14012 *
14013 * [78] extParsedEnt ::= TextDecl? content
14014 *
14015 * This correspond to a "Well Balanced" chunk
14016 *
14017 * Returns the resulting document tree
14018 */
14019
14020xmlDocPtr
14021xmlParseEntity(const char *filename) {
14022 return(xmlSAXParseEntity(NULL, filename));
14023}
Daniel Veillard81273902003-09-30 00:43:48 +000014024#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014025
14026/**
Rob Richards9c0aa472009-03-26 18:10:19 +000014027 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000014028 * @URL: the entity URL
14029 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000014030 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000014031 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000014032 *
14033 * Create a parser context for an external entity
14034 * Automatic support for ZLIB/Compress compressed document is provided
14035 * by default if found at compile-time.
14036 *
14037 * Returns the new parser context or NULL
14038 */
Rob Richards9c0aa472009-03-26 18:10:19 +000014039static xmlParserCtxtPtr
14040xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14041 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000014042 xmlParserCtxtPtr ctxt;
14043 xmlParserInputPtr inputStream;
14044 char *directory = NULL;
14045 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000014046
Owen Taylor3473f882001-02-23 17:55:21 +000014047 ctxt = xmlNewParserCtxt();
14048 if (ctxt == NULL) {
14049 return(NULL);
14050 }
14051
Daniel Veillard48247b42009-07-10 16:12:46 +020014052 if (pctx != NULL) {
14053 ctxt->options = pctx->options;
14054 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014055 }
14056
Owen Taylor3473f882001-02-23 17:55:21 +000014057 uri = xmlBuildURI(URL, base);
14058
14059 if (uri == NULL) {
14060 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14061 if (inputStream == NULL) {
14062 xmlFreeParserCtxt(ctxt);
14063 return(NULL);
14064 }
14065
14066 inputPush(ctxt, inputStream);
14067
14068 if ((ctxt->directory == NULL) && (directory == NULL))
14069 directory = xmlParserGetDirectory((char *)URL);
14070 if ((ctxt->directory == NULL) && (directory != NULL))
14071 ctxt->directory = directory;
14072 } else {
14073 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14074 if (inputStream == NULL) {
14075 xmlFree(uri);
14076 xmlFreeParserCtxt(ctxt);
14077 return(NULL);
14078 }
14079
14080 inputPush(ctxt, inputStream);
14081
14082 if ((ctxt->directory == NULL) && (directory == NULL))
14083 directory = xmlParserGetDirectory((char *)uri);
14084 if ((ctxt->directory == NULL) && (directory != NULL))
14085 ctxt->directory = directory;
14086 xmlFree(uri);
14087 }
Owen Taylor3473f882001-02-23 17:55:21 +000014088 return(ctxt);
14089}
14090
Rob Richards9c0aa472009-03-26 18:10:19 +000014091/**
14092 * xmlCreateEntityParserCtxt:
14093 * @URL: the entity URL
14094 * @ID: the entity PUBLIC ID
14095 * @base: a possible base for the target URI
14096 *
14097 * Create a parser context for an external entity
14098 * Automatic support for ZLIB/Compress compressed document is provided
14099 * by default if found at compile-time.
14100 *
14101 * Returns the new parser context or NULL
14102 */
14103xmlParserCtxtPtr
14104xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14105 const xmlChar *base) {
14106 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14107
14108}
14109
Owen Taylor3473f882001-02-23 17:55:21 +000014110/************************************************************************
14111 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014112 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014113 * *
14114 ************************************************************************/
14115
14116/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014117 * xmlCreateURLParserCtxt:
14118 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014119 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014120 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014121 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014122 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014123 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014124 *
14125 * Returns the new parser context or NULL
14126 */
14127xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014128xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014129{
14130 xmlParserCtxtPtr ctxt;
14131 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014132 char *directory = NULL;
14133
Owen Taylor3473f882001-02-23 17:55:21 +000014134 ctxt = xmlNewParserCtxt();
14135 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014136 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014137 return(NULL);
14138 }
14139
Daniel Veillarddf292f72005-01-16 19:00:15 +000014140 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014141 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014142 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014143
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014144 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014145 if (inputStream == NULL) {
14146 xmlFreeParserCtxt(ctxt);
14147 return(NULL);
14148 }
14149
Owen Taylor3473f882001-02-23 17:55:21 +000014150 inputPush(ctxt, inputStream);
14151 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014152 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014153 if ((ctxt->directory == NULL) && (directory != NULL))
14154 ctxt->directory = directory;
14155
14156 return(ctxt);
14157}
14158
Daniel Veillard61b93382003-11-03 14:28:31 +000014159/**
14160 * xmlCreateFileParserCtxt:
14161 * @filename: the filename
14162 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014163 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014164 * Automatic support for ZLIB/Compress compressed document is provided
14165 * by default if found at compile-time.
14166 *
14167 * Returns the new parser context or NULL
14168 */
14169xmlParserCtxtPtr
14170xmlCreateFileParserCtxt(const char *filename)
14171{
14172 return(xmlCreateURLParserCtxt(filename, 0));
14173}
14174
Daniel Veillard81273902003-09-30 00:43:48 +000014175#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014176/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014177 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014178 * @sax: the SAX handler block
14179 * @filename: the filename
14180 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14181 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014182 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014183 *
14184 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14185 * compressed document is provided by default if found at compile-time.
14186 * It use the given SAX function block to handle the parsing callback.
14187 * If sax is NULL, fallback to the default DOM tree building routines.
14188 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014189 * User data (void *) is stored within the parser context in the
14190 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014191 *
Owen Taylor3473f882001-02-23 17:55:21 +000014192 * Returns the resulting document tree
14193 */
14194
14195xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014196xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14197 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014198 xmlDocPtr ret;
14199 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014200
Daniel Veillard635ef722001-10-29 11:48:19 +000014201 xmlInitParser();
14202
Owen Taylor3473f882001-02-23 17:55:21 +000014203 ctxt = xmlCreateFileParserCtxt(filename);
14204 if (ctxt == NULL) {
14205 return(NULL);
14206 }
14207 if (sax != NULL) {
14208 if (ctxt->sax != NULL)
14209 xmlFree(ctxt->sax);
14210 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014211 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014212 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014213 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014214 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014215 }
Owen Taylor3473f882001-02-23 17:55:21 +000014216
Daniel Veillard37d2d162008-03-14 10:54:00 +000014217 if (ctxt->directory == NULL)
14218 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014219
Daniel Veillarddad3f682002-11-17 16:47:27 +000014220 ctxt->recovery = recovery;
14221
Owen Taylor3473f882001-02-23 17:55:21 +000014222 xmlParseDocument(ctxt);
14223
William M. Brackc07329e2003-09-08 01:57:30 +000014224 if ((ctxt->wellFormed) || recovery) {
14225 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014226 if (ret != NULL) {
14227 if (ctxt->input->buf->compressed > 0)
14228 ret->compression = 9;
14229 else
14230 ret->compression = ctxt->input->buf->compressed;
14231 }
William M. Brackc07329e2003-09-08 01:57:30 +000014232 }
Owen Taylor3473f882001-02-23 17:55:21 +000014233 else {
14234 ret = NULL;
14235 xmlFreeDoc(ctxt->myDoc);
14236 ctxt->myDoc = NULL;
14237 }
14238 if (sax != NULL)
14239 ctxt->sax = NULL;
14240 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014241
Owen Taylor3473f882001-02-23 17:55:21 +000014242 return(ret);
14243}
14244
14245/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014246 * xmlSAXParseFile:
14247 * @sax: the SAX handler block
14248 * @filename: the filename
14249 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14250 * documents
14251 *
14252 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14253 * compressed document is provided by default if found at compile-time.
14254 * It use the given SAX function block to handle the parsing callback.
14255 * If sax is NULL, fallback to the default DOM tree building routines.
14256 *
14257 * Returns the resulting document tree
14258 */
14259
14260xmlDocPtr
14261xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14262 int recovery) {
14263 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14264}
14265
14266/**
Owen Taylor3473f882001-02-23 17:55:21 +000014267 * xmlRecoverDoc:
14268 * @cur: a pointer to an array of xmlChar
14269 *
14270 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014271 * In the case the document is not Well Formed, a attempt to build a
14272 * tree is tried anyway
14273 *
14274 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014275 */
14276
14277xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014278xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014279 return(xmlSAXParseDoc(NULL, cur, 1));
14280}
14281
14282/**
14283 * xmlParseFile:
14284 * @filename: the filename
14285 *
14286 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14287 * compressed document is provided by default if found at compile-time.
14288 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014289 * Returns the resulting document tree if the file was wellformed,
14290 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014291 */
14292
14293xmlDocPtr
14294xmlParseFile(const char *filename) {
14295 return(xmlSAXParseFile(NULL, filename, 0));
14296}
14297
14298/**
14299 * xmlRecoverFile:
14300 * @filename: the filename
14301 *
14302 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14303 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014304 * In the case the document is not Well Formed, it attempts to build
14305 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014306 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014307 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014308 */
14309
14310xmlDocPtr
14311xmlRecoverFile(const char *filename) {
14312 return(xmlSAXParseFile(NULL, filename, 1));
14313}
14314
14315
14316/**
14317 * xmlSetupParserForBuffer:
14318 * @ctxt: an XML parser context
14319 * @buffer: a xmlChar * buffer
14320 * @filename: a file name
14321 *
14322 * Setup the parser context to parse a new buffer; Clears any prior
14323 * contents from the parser context. The buffer parameter must not be
14324 * NULL, but the filename parameter can be
14325 */
14326void
14327xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14328 const char* filename)
14329{
14330 xmlParserInputPtr input;
14331
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014332 if ((ctxt == NULL) || (buffer == NULL))
14333 return;
14334
Owen Taylor3473f882001-02-23 17:55:21 +000014335 input = xmlNewInputStream(ctxt);
14336 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014337 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014338 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014339 return;
14340 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014341
Owen Taylor3473f882001-02-23 17:55:21 +000014342 xmlClearParserCtxt(ctxt);
14343 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014344 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014345 input->base = buffer;
14346 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014347 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014348 inputPush(ctxt, input);
14349}
14350
14351/**
14352 * xmlSAXUserParseFile:
14353 * @sax: a SAX handler
14354 * @user_data: The user data returned on SAX callbacks
14355 * @filename: a file name
14356 *
14357 * parse an XML file and call the given SAX handler routines.
14358 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014359 *
Owen Taylor3473f882001-02-23 17:55:21 +000014360 * Returns 0 in case of success or a error number otherwise
14361 */
14362int
14363xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14364 const char *filename) {
14365 int ret = 0;
14366 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014367
Owen Taylor3473f882001-02-23 17:55:21 +000014368 ctxt = xmlCreateFileParserCtxt(filename);
14369 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014370 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014371 xmlFree(ctxt->sax);
14372 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014373 xmlDetectSAX2(ctxt);
14374
Owen Taylor3473f882001-02-23 17:55:21 +000014375 if (user_data != NULL)
14376 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014377
Owen Taylor3473f882001-02-23 17:55:21 +000014378 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014379
Owen Taylor3473f882001-02-23 17:55:21 +000014380 if (ctxt->wellFormed)
14381 ret = 0;
14382 else {
14383 if (ctxt->errNo != 0)
14384 ret = ctxt->errNo;
14385 else
14386 ret = -1;
14387 }
14388 if (sax != NULL)
14389 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014390 if (ctxt->myDoc != NULL) {
14391 xmlFreeDoc(ctxt->myDoc);
14392 ctxt->myDoc = NULL;
14393 }
Owen Taylor3473f882001-02-23 17:55:21 +000014394 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014395
Owen Taylor3473f882001-02-23 17:55:21 +000014396 return ret;
14397}
Daniel Veillard81273902003-09-30 00:43:48 +000014398#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014399
14400/************************************************************************
14401 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014402 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014403 * *
14404 ************************************************************************/
14405
14406/**
14407 * xmlCreateMemoryParserCtxt:
14408 * @buffer: a pointer to a char array
14409 * @size: the size of the array
14410 *
14411 * Create a parser context for an XML in-memory document.
14412 *
14413 * Returns the new parser context or NULL
14414 */
14415xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014416xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014417 xmlParserCtxtPtr ctxt;
14418 xmlParserInputPtr input;
14419 xmlParserInputBufferPtr buf;
14420
14421 if (buffer == NULL)
14422 return(NULL);
14423 if (size <= 0)
14424 return(NULL);
14425
14426 ctxt = xmlNewParserCtxt();
14427 if (ctxt == NULL)
14428 return(NULL);
14429
Daniel Veillard53350552003-09-18 13:35:51 +000014430 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014431 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014432 if (buf == NULL) {
14433 xmlFreeParserCtxt(ctxt);
14434 return(NULL);
14435 }
Owen Taylor3473f882001-02-23 17:55:21 +000014436
14437 input = xmlNewInputStream(ctxt);
14438 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014439 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014440 xmlFreeParserCtxt(ctxt);
14441 return(NULL);
14442 }
14443
14444 input->filename = NULL;
14445 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014446 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014447
14448 inputPush(ctxt, input);
14449 return(ctxt);
14450}
14451
Daniel Veillard81273902003-09-30 00:43:48 +000014452#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014453/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014454 * xmlSAXParseMemoryWithData:
14455 * @sax: the SAX handler block
14456 * @buffer: an pointer to a char array
14457 * @size: the size of the array
14458 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14459 * documents
14460 * @data: the userdata
14461 *
14462 * parse an XML in-memory block and use the given SAX function block
14463 * to handle the parsing callback. If sax is NULL, fallback to the default
14464 * DOM tree building routines.
14465 *
14466 * User data (void *) is stored within the parser context in the
14467 * context's _private member, so it is available nearly everywhere in libxml
14468 *
14469 * Returns the resulting document tree
14470 */
14471
14472xmlDocPtr
14473xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14474 int size, int recovery, void *data) {
14475 xmlDocPtr ret;
14476 xmlParserCtxtPtr ctxt;
14477
Daniel Veillardab2a7632009-07-09 08:45:03 +020014478 xmlInitParser();
14479
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014480 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14481 if (ctxt == NULL) return(NULL);
14482 if (sax != NULL) {
14483 if (ctxt->sax != NULL)
14484 xmlFree(ctxt->sax);
14485 ctxt->sax = sax;
14486 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014487 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014488 if (data!=NULL) {
14489 ctxt->_private=data;
14490 }
14491
Daniel Veillardadba5f12003-04-04 16:09:01 +000014492 ctxt->recovery = recovery;
14493
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014494 xmlParseDocument(ctxt);
14495
14496 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14497 else {
14498 ret = NULL;
14499 xmlFreeDoc(ctxt->myDoc);
14500 ctxt->myDoc = NULL;
14501 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014502 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014503 ctxt->sax = NULL;
14504 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014505
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014506 return(ret);
14507}
14508
14509/**
Owen Taylor3473f882001-02-23 17:55:21 +000014510 * xmlSAXParseMemory:
14511 * @sax: the SAX handler block
14512 * @buffer: an pointer to a char array
14513 * @size: the size of the array
14514 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14515 * documents
14516 *
14517 * parse an XML in-memory block and use the given SAX function block
14518 * to handle the parsing callback. If sax is NULL, fallback to the default
14519 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014520 *
Owen Taylor3473f882001-02-23 17:55:21 +000014521 * Returns the resulting document tree
14522 */
14523xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014524xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14525 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014526 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014527}
14528
14529/**
14530 * xmlParseMemory:
14531 * @buffer: an pointer to a char array
14532 * @size: the size of the array
14533 *
14534 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014535 *
Owen Taylor3473f882001-02-23 17:55:21 +000014536 * Returns the resulting document tree
14537 */
14538
Daniel Veillard50822cb2001-07-26 20:05:51 +000014539xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014540 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14541}
14542
14543/**
14544 * xmlRecoverMemory:
14545 * @buffer: an pointer to a char array
14546 * @size: the size of the array
14547 *
14548 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014549 * In the case the document is not Well Formed, an attempt to
14550 * build a tree is tried anyway
14551 *
14552 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014553 */
14554
Daniel Veillard50822cb2001-07-26 20:05:51 +000014555xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014556 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14557}
14558
14559/**
14560 * xmlSAXUserParseMemory:
14561 * @sax: a SAX handler
14562 * @user_data: The user data returned on SAX callbacks
14563 * @buffer: an in-memory XML document input
14564 * @size: the length of the XML document in bytes
14565 *
14566 * A better SAX parsing routine.
14567 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014568 *
Owen Taylor3473f882001-02-23 17:55:21 +000014569 * Returns 0 in case of success or a error number otherwise
14570 */
14571int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014572 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014573 int ret = 0;
14574 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014575
14576 xmlInitParser();
14577
Owen Taylor3473f882001-02-23 17:55:21 +000014578 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14579 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014580 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14581 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014582 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014583 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014584
Daniel Veillard30211a02001-04-26 09:33:18 +000014585 if (user_data != NULL)
14586 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014587
Owen Taylor3473f882001-02-23 17:55:21 +000014588 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014589
Owen Taylor3473f882001-02-23 17:55:21 +000014590 if (ctxt->wellFormed)
14591 ret = 0;
14592 else {
14593 if (ctxt->errNo != 0)
14594 ret = ctxt->errNo;
14595 else
14596 ret = -1;
14597 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014598 if (sax != NULL)
14599 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014600 if (ctxt->myDoc != NULL) {
14601 xmlFreeDoc(ctxt->myDoc);
14602 ctxt->myDoc = NULL;
14603 }
Owen Taylor3473f882001-02-23 17:55:21 +000014604 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014605
Owen Taylor3473f882001-02-23 17:55:21 +000014606 return ret;
14607}
Daniel Veillard81273902003-09-30 00:43:48 +000014608#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014609
14610/**
14611 * xmlCreateDocParserCtxt:
14612 * @cur: a pointer to an array of xmlChar
14613 *
14614 * Creates a parser context for an XML in-memory document.
14615 *
14616 * Returns the new parser context or NULL
14617 */
14618xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014619xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014620 int len;
14621
14622 if (cur == NULL)
14623 return(NULL);
14624 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014625 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014626}
14627
Daniel Veillard81273902003-09-30 00:43:48 +000014628#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014629/**
14630 * xmlSAXParseDoc:
14631 * @sax: the SAX handler block
14632 * @cur: a pointer to an array of xmlChar
14633 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14634 * documents
14635 *
14636 * parse an XML in-memory document and build a tree.
14637 * It use the given SAX function block to handle the parsing callback.
14638 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014639 *
Owen Taylor3473f882001-02-23 17:55:21 +000014640 * Returns the resulting document tree
14641 */
14642
14643xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014644xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014645 xmlDocPtr ret;
14646 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014647 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014648
Daniel Veillard38936062004-11-04 17:45:11 +000014649 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014650
14651
14652 ctxt = xmlCreateDocParserCtxt(cur);
14653 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014654 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014655 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014656 ctxt->sax = sax;
14657 ctxt->userData = NULL;
14658 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014659 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014660
14661 xmlParseDocument(ctxt);
14662 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14663 else {
14664 ret = NULL;
14665 xmlFreeDoc(ctxt->myDoc);
14666 ctxt->myDoc = NULL;
14667 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014668 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014669 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014670 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014671
Owen Taylor3473f882001-02-23 17:55:21 +000014672 return(ret);
14673}
14674
14675/**
14676 * xmlParseDoc:
14677 * @cur: a pointer to an array of xmlChar
14678 *
14679 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014680 *
Owen Taylor3473f882001-02-23 17:55:21 +000014681 * Returns the resulting document tree
14682 */
14683
14684xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014685xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014686 return(xmlSAXParseDoc(NULL, cur, 0));
14687}
Daniel Veillard81273902003-09-30 00:43:48 +000014688#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014689
Daniel Veillard81273902003-09-30 00:43:48 +000014690#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014691/************************************************************************
14692 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014693 * Specific function to keep track of entities references *
14694 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014695 * *
14696 ************************************************************************/
14697
14698static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14699
14700/**
14701 * xmlAddEntityReference:
14702 * @ent : A valid entity
14703 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014704 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014705 *
14706 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14707 */
14708static void
14709xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14710 xmlNodePtr lastNode)
14711{
14712 if (xmlEntityRefFunc != NULL) {
14713 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14714 }
14715}
14716
14717
14718/**
14719 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014720 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014721 *
14722 * Set the function to call call back when a xml reference has been made
14723 */
14724void
14725xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14726{
14727 xmlEntityRefFunc = func;
14728}
Daniel Veillard81273902003-09-30 00:43:48 +000014729#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014730
14731/************************************************************************
14732 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014733 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014734 * *
14735 ************************************************************************/
14736
14737#ifdef LIBXML_XPATH_ENABLED
14738#include <libxml/xpath.h>
14739#endif
14740
Daniel Veillardffa3c742005-07-21 13:24:09 +000014741extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014742static int xmlParserInitialized = 0;
14743
14744/**
14745 * xmlInitParser:
14746 *
14747 * Initialization function for the XML parser.
14748 * This is not reentrant. Call once before processing in case of
14749 * use in multithreaded programs.
14750 */
14751
14752void
14753xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014754 if (xmlParserInitialized != 0)
14755 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014756
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014757#ifdef LIBXML_THREAD_ENABLED
14758 __xmlGlobalInitMutexLock();
14759 if (xmlParserInitialized == 0) {
14760#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014761 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014762 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014763 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14764 (xmlGenericError == NULL))
14765 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014766 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014767 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014768 xmlInitCharEncodingHandlers();
14769 xmlDefaultSAXHandlerInit();
14770 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014771#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014772 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014773#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014774#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014775 htmlInitAutoClose();
14776 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014777#endif
14778#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014779 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014780#endif
Daniel Veillard054c7162014-01-26 15:02:25 +010014781#ifdef LIBXML_CATALOG_ENABLED
14782 xmlInitializeCatalog();
14783#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014784 xmlParserInitialized = 1;
14785#ifdef LIBXML_THREAD_ENABLED
14786 }
14787 __xmlGlobalInitMutexUnlock();
14788#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014789}
14790
14791/**
14792 * xmlCleanupParser:
14793 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014794 * This function name is somewhat misleading. It does not clean up
14795 * parser state, it cleans up memory allocated by the library itself.
14796 * It is a cleanup function for the XML library. It tries to reclaim all
14797 * related global memory allocated for the library processing.
14798 * It doesn't deallocate any document related memory. One should
14799 * call xmlCleanupParser() only when the process has finished using
14800 * the library and all XML/HTML documents built with it.
14801 * See also xmlInitParser() which has the opposite function of preparing
14802 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014803 *
14804 * WARNING: if your application is multithreaded or has plugin support
14805 * calling this may crash the application if another thread or
14806 * a plugin is still using libxml2. It's sometimes very hard to
14807 * guess if libxml2 is in use in the application, some libraries
14808 * or plugins may use it without notice. In case of doubt abstain
14809 * from calling this function or do it just before calling exit()
14810 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014811 */
14812
14813void
14814xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014815 if (!xmlParserInitialized)
14816 return;
14817
Owen Taylor3473f882001-02-23 17:55:21 +000014818 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014819#ifdef LIBXML_CATALOG_ENABLED
14820 xmlCatalogCleanup();
14821#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014822 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014823 xmlCleanupInputCallbacks();
14824#ifdef LIBXML_OUTPUT_ENABLED
14825 xmlCleanupOutputCallbacks();
14826#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014827#ifdef LIBXML_SCHEMAS_ENABLED
14828 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014829 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014830#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014831 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014832 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014833 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014834 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014835 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014836}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014837
14838/************************************************************************
14839 * *
14840 * New set (2.6.0) of simpler and more flexible APIs *
14841 * *
14842 ************************************************************************/
14843
14844/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014845 * DICT_FREE:
14846 * @str: a string
14847 *
14848 * Free a string if it is not owned by the "dict" dictionnary in the
14849 * current scope
14850 */
14851#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014852 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014853 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14854 xmlFree((char *)(str));
14855
14856/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014857 * xmlCtxtReset:
14858 * @ctxt: an XML parser context
14859 *
14860 * Reset a parser context
14861 */
14862void
14863xmlCtxtReset(xmlParserCtxtPtr ctxt)
14864{
14865 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014866 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014867
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014868 if (ctxt == NULL)
14869 return;
14870
14871 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014872
14873 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14874 xmlFreeInputStream(input);
14875 }
14876 ctxt->inputNr = 0;
14877 ctxt->input = NULL;
14878
14879 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014880 if (ctxt->spaceTab != NULL) {
14881 ctxt->spaceTab[0] = -1;
14882 ctxt->space = &ctxt->spaceTab[0];
14883 } else {
14884 ctxt->space = NULL;
14885 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014886
14887
14888 ctxt->nodeNr = 0;
14889 ctxt->node = NULL;
14890
14891 ctxt->nameNr = 0;
14892 ctxt->name = NULL;
14893
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014894 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014895 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014896 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014897 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014898 DICT_FREE(ctxt->directory);
14899 ctxt->directory = NULL;
14900 DICT_FREE(ctxt->extSubURI);
14901 ctxt->extSubURI = NULL;
14902 DICT_FREE(ctxt->extSubSystem);
14903 ctxt->extSubSystem = NULL;
14904 if (ctxt->myDoc != NULL)
14905 xmlFreeDoc(ctxt->myDoc);
14906 ctxt->myDoc = NULL;
14907
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014908 ctxt->standalone = -1;
14909 ctxt->hasExternalSubset = 0;
14910 ctxt->hasPErefs = 0;
14911 ctxt->html = 0;
14912 ctxt->external = 0;
14913 ctxt->instate = XML_PARSER_START;
14914 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014915
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014916 ctxt->wellFormed = 1;
14917 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014918 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014919 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014920#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014921 ctxt->vctxt.userData = ctxt;
14922 ctxt->vctxt.error = xmlParserValidityError;
14923 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014924#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014925 ctxt->record_info = 0;
14926 ctxt->nbChars = 0;
14927 ctxt->checkIndex = 0;
14928 ctxt->inSubset = 0;
14929 ctxt->errNo = XML_ERR_OK;
14930 ctxt->depth = 0;
14931 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14932 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014933 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014934 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014935 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014936 xmlInitNodeInfoSeq(&ctxt->node_seq);
14937
14938 if (ctxt->attsDefault != NULL) {
14939 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14940 ctxt->attsDefault = NULL;
14941 }
14942 if (ctxt->attsSpecial != NULL) {
14943 xmlHashFree(ctxt->attsSpecial, NULL);
14944 ctxt->attsSpecial = NULL;
14945 }
14946
Daniel Veillard4432df22003-09-28 18:58:27 +000014947#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014948 if (ctxt->catalogs != NULL)
14949 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014950#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014951 if (ctxt->lastError.code != XML_ERR_OK)
14952 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014953}
14954
14955/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014956 * xmlCtxtResetPush:
14957 * @ctxt: an XML parser context
14958 * @chunk: a pointer to an array of chars
14959 * @size: number of chars in the array
14960 * @filename: an optional file name or URI
14961 * @encoding: the document encoding, or NULL
14962 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014963 * Reset a push parser context
14964 *
14965 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014966 */
14967int
14968xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14969 int size, const char *filename, const char *encoding)
14970{
14971 xmlParserInputPtr inputStream;
14972 xmlParserInputBufferPtr buf;
14973 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14974
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014975 if (ctxt == NULL)
14976 return(1);
14977
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014978 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14979 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14980
14981 buf = xmlAllocParserInputBuffer(enc);
14982 if (buf == NULL)
14983 return(1);
14984
14985 if (ctxt == NULL) {
14986 xmlFreeParserInputBuffer(buf);
14987 return(1);
14988 }
14989
14990 xmlCtxtReset(ctxt);
14991
14992 if (ctxt->pushTab == NULL) {
14993 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14994 sizeof(xmlChar *));
14995 if (ctxt->pushTab == NULL) {
14996 xmlErrMemory(ctxt, NULL);
14997 xmlFreeParserInputBuffer(buf);
14998 return(1);
14999 }
15000 }
15001
15002 if (filename == NULL) {
15003 ctxt->directory = NULL;
15004 } else {
15005 ctxt->directory = xmlParserGetDirectory(filename);
15006 }
15007
15008 inputStream = xmlNewInputStream(ctxt);
15009 if (inputStream == NULL) {
15010 xmlFreeParserInputBuffer(buf);
15011 return(1);
15012 }
15013
15014 if (filename == NULL)
15015 inputStream->filename = NULL;
15016 else
15017 inputStream->filename = (char *)
15018 xmlCanonicPath((const xmlChar *) filename);
15019 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080015020 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015021
15022 inputPush(ctxt, inputStream);
15023
15024 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15025 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015026 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15027 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015028
15029 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15030
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015031 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015032#ifdef DEBUG_PUSH
15033 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15034#endif
15035 }
15036
15037 if (encoding != NULL) {
15038 xmlCharEncodingHandlerPtr hdlr;
15039
Daniel Veillard37334572008-07-31 08:20:02 +000015040 if (ctxt->encoding != NULL)
15041 xmlFree((xmlChar *) ctxt->encoding);
15042 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15043
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015044 hdlr = xmlFindCharEncodingHandler(encoding);
15045 if (hdlr != NULL) {
15046 xmlSwitchToEncoding(ctxt, hdlr);
15047 } else {
15048 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15049 "Unsupported encoding %s\n", BAD_CAST encoding);
15050 }
15051 } else if (enc != XML_CHAR_ENCODING_NONE) {
15052 xmlSwitchEncoding(ctxt, enc);
15053 }
15054
15055 return(0);
15056}
15057
Daniel Veillard37334572008-07-31 08:20:02 +000015058
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015059/**
Daniel Veillard37334572008-07-31 08:20:02 +000015060 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015061 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015062 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015063 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015064 *
15065 * Applies the options to the parser context
15066 *
15067 * Returns 0 in case of success, the set of unknown or unimplemented options
15068 * in case of error.
15069 */
Daniel Veillard37334572008-07-31 08:20:02 +000015070static int
15071xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015072{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015073 if (ctxt == NULL)
15074 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015075 if (encoding != NULL) {
15076 if (ctxt->encoding != NULL)
15077 xmlFree((xmlChar *) ctxt->encoding);
15078 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15079 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015080 if (options & XML_PARSE_RECOVER) {
15081 ctxt->recovery = 1;
15082 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015083 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015084 } else
15085 ctxt->recovery = 0;
15086 if (options & XML_PARSE_DTDLOAD) {
15087 ctxt->loadsubset = XML_DETECT_IDS;
15088 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015089 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015090 } else
15091 ctxt->loadsubset = 0;
15092 if (options & XML_PARSE_DTDATTR) {
15093 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15094 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015095 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015096 }
15097 if (options & XML_PARSE_NOENT) {
15098 ctxt->replaceEntities = 1;
15099 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15100 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015101 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015102 } else
15103 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015104 if (options & XML_PARSE_PEDANTIC) {
15105 ctxt->pedantic = 1;
15106 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015107 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015108 } else
15109 ctxt->pedantic = 0;
15110 if (options & XML_PARSE_NOBLANKS) {
15111 ctxt->keepBlanks = 0;
15112 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15113 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015114 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015115 } else
15116 ctxt->keepBlanks = 1;
15117 if (options & XML_PARSE_DTDVALID) {
15118 ctxt->validate = 1;
15119 if (options & XML_PARSE_NOWARNING)
15120 ctxt->vctxt.warning = NULL;
15121 if (options & XML_PARSE_NOERROR)
15122 ctxt->vctxt.error = NULL;
15123 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015124 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015125 } else
15126 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015127 if (options & XML_PARSE_NOWARNING) {
15128 ctxt->sax->warning = NULL;
15129 options -= XML_PARSE_NOWARNING;
15130 }
15131 if (options & XML_PARSE_NOERROR) {
15132 ctxt->sax->error = NULL;
15133 ctxt->sax->fatalError = NULL;
15134 options -= XML_PARSE_NOERROR;
15135 }
Daniel Veillard81273902003-09-30 00:43:48 +000015136#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015137 if (options & XML_PARSE_SAX1) {
15138 ctxt->sax->startElement = xmlSAX2StartElement;
15139 ctxt->sax->endElement = xmlSAX2EndElement;
15140 ctxt->sax->startElementNs = NULL;
15141 ctxt->sax->endElementNs = NULL;
15142 ctxt->sax->initialized = 1;
15143 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015144 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015145 }
Daniel Veillard81273902003-09-30 00:43:48 +000015146#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015147 if (options & XML_PARSE_NODICT) {
15148 ctxt->dictNames = 0;
15149 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015150 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015151 } else {
15152 ctxt->dictNames = 1;
15153 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015154 if (options & XML_PARSE_NOCDATA) {
15155 ctxt->sax->cdataBlock = NULL;
15156 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015157 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015158 }
15159 if (options & XML_PARSE_NSCLEAN) {
15160 ctxt->options |= XML_PARSE_NSCLEAN;
15161 options -= XML_PARSE_NSCLEAN;
15162 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015163 if (options & XML_PARSE_NONET) {
15164 ctxt->options |= XML_PARSE_NONET;
15165 options -= XML_PARSE_NONET;
15166 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015167 if (options & XML_PARSE_COMPACT) {
15168 ctxt->options |= XML_PARSE_COMPACT;
15169 options -= XML_PARSE_COMPACT;
15170 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015171 if (options & XML_PARSE_OLD10) {
15172 ctxt->options |= XML_PARSE_OLD10;
15173 options -= XML_PARSE_OLD10;
15174 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015175 if (options & XML_PARSE_NOBASEFIX) {
15176 ctxt->options |= XML_PARSE_NOBASEFIX;
15177 options -= XML_PARSE_NOBASEFIX;
15178 }
15179 if (options & XML_PARSE_HUGE) {
15180 ctxt->options |= XML_PARSE_HUGE;
15181 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015182 if (ctxt->dict != NULL)
15183 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015184 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015185 if (options & XML_PARSE_OLDSAX) {
15186 ctxt->options |= XML_PARSE_OLDSAX;
15187 options -= XML_PARSE_OLDSAX;
15188 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015189 if (options & XML_PARSE_IGNORE_ENC) {
15190 ctxt->options |= XML_PARSE_IGNORE_ENC;
15191 options -= XML_PARSE_IGNORE_ENC;
15192 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015193 if (options & XML_PARSE_BIG_LINES) {
15194 ctxt->options |= XML_PARSE_BIG_LINES;
15195 options -= XML_PARSE_BIG_LINES;
15196 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015197 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015198 return (options);
15199}
15200
15201/**
Daniel Veillard37334572008-07-31 08:20:02 +000015202 * xmlCtxtUseOptions:
15203 * @ctxt: an XML parser context
15204 * @options: a combination of xmlParserOption
15205 *
15206 * Applies the options to the parser context
15207 *
15208 * Returns 0 in case of success, the set of unknown or unimplemented options
15209 * in case of error.
15210 */
15211int
15212xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15213{
15214 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15215}
15216
15217/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015218 * xmlDoRead:
15219 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015220 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015221 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015222 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015223 * @reuse: keep the context for reuse
15224 *
15225 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015226 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015227 * Returns the resulting document tree or NULL
15228 */
15229static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015230xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15231 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015232{
15233 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015234
15235 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015236 if (encoding != NULL) {
15237 xmlCharEncodingHandlerPtr hdlr;
15238
15239 hdlr = xmlFindCharEncodingHandler(encoding);
15240 if (hdlr != NULL)
15241 xmlSwitchToEncoding(ctxt, hdlr);
15242 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015243 if ((URL != NULL) && (ctxt->input != NULL) &&
15244 (ctxt->input->filename == NULL))
15245 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015246 xmlParseDocument(ctxt);
15247 if ((ctxt->wellFormed) || ctxt->recovery)
15248 ret = ctxt->myDoc;
15249 else {
15250 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015251 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015252 xmlFreeDoc(ctxt->myDoc);
15253 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015254 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015255 ctxt->myDoc = NULL;
15256 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015257 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015258 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015259
15260 return (ret);
15261}
15262
15263/**
15264 * xmlReadDoc:
15265 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015266 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015267 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015268 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015269 *
15270 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015271 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015272 * Returns the resulting document tree
15273 */
15274xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015275xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015276{
15277 xmlParserCtxtPtr ctxt;
15278
15279 if (cur == NULL)
15280 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015281 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015282
15283 ctxt = xmlCreateDocParserCtxt(cur);
15284 if (ctxt == NULL)
15285 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015286 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015287}
15288
15289/**
15290 * xmlReadFile:
15291 * @filename: a file or URL
15292 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015293 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015294 *
15295 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015296 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015297 * Returns the resulting document tree
15298 */
15299xmlDocPtr
15300xmlReadFile(const char *filename, const char *encoding, int options)
15301{
15302 xmlParserCtxtPtr ctxt;
15303
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015304 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015305 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015306 if (ctxt == NULL)
15307 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015308 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015309}
15310
15311/**
15312 * xmlReadMemory:
15313 * @buffer: a pointer to a char array
15314 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015315 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015316 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015317 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015318 *
15319 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015320 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015321 * Returns the resulting document tree
15322 */
15323xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015324xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015325{
15326 xmlParserCtxtPtr ctxt;
15327
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015328 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015329 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15330 if (ctxt == NULL)
15331 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015332 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015333}
15334
15335/**
15336 * xmlReadFd:
15337 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015338 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015339 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015340 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015341 *
15342 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015343 * NOTE that the file descriptor will not be closed when the
15344 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015345 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015346 * Returns the resulting document tree
15347 */
15348xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015349xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015350{
15351 xmlParserCtxtPtr ctxt;
15352 xmlParserInputBufferPtr input;
15353 xmlParserInputPtr stream;
15354
15355 if (fd < 0)
15356 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015357 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015358
15359 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15360 if (input == NULL)
15361 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015362 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015363 ctxt = xmlNewParserCtxt();
15364 if (ctxt == NULL) {
15365 xmlFreeParserInputBuffer(input);
15366 return (NULL);
15367 }
15368 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15369 if (stream == NULL) {
15370 xmlFreeParserInputBuffer(input);
15371 xmlFreeParserCtxt(ctxt);
15372 return (NULL);
15373 }
15374 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015375 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015376}
15377
15378/**
15379 * xmlReadIO:
15380 * @ioread: an I/O read function
15381 * @ioclose: an I/O close function
15382 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015383 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015384 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015385 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015386 *
15387 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015388 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015389 * Returns the resulting document tree
15390 */
15391xmlDocPtr
15392xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015393 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015394{
15395 xmlParserCtxtPtr ctxt;
15396 xmlParserInputBufferPtr input;
15397 xmlParserInputPtr stream;
15398
15399 if (ioread == NULL)
15400 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015401 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015402
15403 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15404 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015405 if (input == NULL) {
15406 if (ioclose != NULL)
15407 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015408 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015409 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015410 ctxt = xmlNewParserCtxt();
15411 if (ctxt == NULL) {
15412 xmlFreeParserInputBuffer(input);
15413 return (NULL);
15414 }
15415 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15416 if (stream == NULL) {
15417 xmlFreeParserInputBuffer(input);
15418 xmlFreeParserCtxt(ctxt);
15419 return (NULL);
15420 }
15421 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015422 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015423}
15424
15425/**
15426 * xmlCtxtReadDoc:
15427 * @ctxt: an XML parser context
15428 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015429 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015430 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015431 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015432 *
15433 * parse an XML in-memory document and build a tree.
15434 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015435 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015436 * Returns the resulting document tree
15437 */
15438xmlDocPtr
15439xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015440 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015441{
15442 xmlParserInputPtr stream;
15443
15444 if (cur == NULL)
15445 return (NULL);
15446 if (ctxt == NULL)
15447 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015448 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015449
15450 xmlCtxtReset(ctxt);
15451
15452 stream = xmlNewStringInputStream(ctxt, cur);
15453 if (stream == NULL) {
15454 return (NULL);
15455 }
15456 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015457 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015458}
15459
15460/**
15461 * xmlCtxtReadFile:
15462 * @ctxt: an XML parser context
15463 * @filename: a file or URL
15464 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015465 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015466 *
15467 * parse an XML file from the filesystem or the network.
15468 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015469 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015470 * Returns the resulting document tree
15471 */
15472xmlDocPtr
15473xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15474 const char *encoding, int options)
15475{
15476 xmlParserInputPtr stream;
15477
15478 if (filename == NULL)
15479 return (NULL);
15480 if (ctxt == NULL)
15481 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015482 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015483
15484 xmlCtxtReset(ctxt);
15485
Daniel Veillard29614c72004-11-26 10:47:26 +000015486 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015487 if (stream == NULL) {
15488 return (NULL);
15489 }
15490 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015491 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015492}
15493
15494/**
15495 * xmlCtxtReadMemory:
15496 * @ctxt: an XML parser context
15497 * @buffer: a pointer to a char array
15498 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015499 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015500 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015501 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015502 *
15503 * parse an XML in-memory document and build a tree.
15504 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015505 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015506 * Returns the resulting document tree
15507 */
15508xmlDocPtr
15509xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015510 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015511{
15512 xmlParserInputBufferPtr input;
15513 xmlParserInputPtr stream;
15514
15515 if (ctxt == NULL)
15516 return (NULL);
15517 if (buffer == NULL)
15518 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015519 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015520
15521 xmlCtxtReset(ctxt);
15522
15523 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15524 if (input == NULL) {
15525 return(NULL);
15526 }
15527
15528 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15529 if (stream == NULL) {
15530 xmlFreeParserInputBuffer(input);
15531 return(NULL);
15532 }
15533
15534 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015535 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015536}
15537
15538/**
15539 * xmlCtxtReadFd:
15540 * @ctxt: an XML parser context
15541 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015542 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015543 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015544 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015545 *
15546 * parse an XML from a file descriptor and build a tree.
15547 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015548 * NOTE that the file descriptor will not be closed when the
15549 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015550 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015551 * Returns the resulting document tree
15552 */
15553xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015554xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15555 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015556{
15557 xmlParserInputBufferPtr input;
15558 xmlParserInputPtr stream;
15559
15560 if (fd < 0)
15561 return (NULL);
15562 if (ctxt == NULL)
15563 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015564 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015565
15566 xmlCtxtReset(ctxt);
15567
15568
15569 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15570 if (input == NULL)
15571 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015572 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015573 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15574 if (stream == NULL) {
15575 xmlFreeParserInputBuffer(input);
15576 return (NULL);
15577 }
15578 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015579 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015580}
15581
15582/**
15583 * xmlCtxtReadIO:
15584 * @ctxt: an XML parser context
15585 * @ioread: an I/O read function
15586 * @ioclose: an I/O close function
15587 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015588 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015589 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015590 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015591 *
15592 * parse an XML document from I/O functions and source and build a tree.
15593 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015594 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015595 * Returns the resulting document tree
15596 */
15597xmlDocPtr
15598xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15599 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015600 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015601 const char *encoding, int options)
15602{
15603 xmlParserInputBufferPtr input;
15604 xmlParserInputPtr stream;
15605
15606 if (ioread == NULL)
15607 return (NULL);
15608 if (ctxt == NULL)
15609 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015610 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015611
15612 xmlCtxtReset(ctxt);
15613
15614 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15615 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015616 if (input == NULL) {
15617 if (ioclose != NULL)
15618 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015619 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015620 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015621 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15622 if (stream == NULL) {
15623 xmlFreeParserInputBuffer(input);
15624 return (NULL);
15625 }
15626 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015627 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015628}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015629
15630#define bottom_parser
15631#include "elfgcchack.h"